diff options
Diffstat (limited to 'fs')
149 files changed, 3354 insertions, 2394 deletions
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 9cc18775b83..2ff622f6f54 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -121,7 +121,7 @@ struct adfs_discmap { /* Inode stuff */ struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); -int adfs_write_inode(struct inode *inode,int unused); +int adfs_write_inode(struct inode *inode, struct writeback_control *wbc); int adfs_notify_change(struct dentry *dentry, struct iattr *attr); /* map.c */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 3f57ce4bee5..0f5e3097813 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -9,6 +9,7 @@ */ #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/writeback.h> #include "adfs.h" /* @@ -360,7 +361,7 @@ out: * The adfs-specific inode data has already been updated by * adfs_notify_change() */ -int adfs_write_inode(struct inode *inode, int wait) +int adfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct super_block *sb = inode->i_sb; struct object_info obj; @@ -375,7 +376,7 @@ int adfs_write_inode(struct inode *inode, int wait) obj.attr = ADFS_I(inode)->attr; obj.size = inode->i_size; - ret = adfs_dir_update(sb, &obj, wait); + ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 0e40caaba45..861dae68ac1 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -175,7 +175,8 @@ extern void affs_delete_inode(struct inode *inode); extern void affs_clear_inode(struct inode *inode); extern struct inode *affs_iget(struct super_block *sb, unsigned long ino); -extern int affs_write_inode(struct inode *inode, int); +extern int affs_write_inode(struct inode *inode, + struct writeback_control *wbc); extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type); /* file.c */ diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 3c4ec7d864c..c9744d771d9 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -166,7 +166,7 @@ bad_inode: } int -affs_write_inode(struct inode *inode, int unused) +affs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct super_block *sb = inode->i_sb; struct buffer_head *bh; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6ece2a13bf7..c54dad4e606 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -733,7 +733,6 @@ extern int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata); extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); -extern int afs_write_inode(struct inode *, int); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); diff --git a/fs/afs/super.c b/fs/afs/super.c index e1ea1c240b6..14f6431598a 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -48,7 +48,6 @@ struct file_system_type afs_fs_type = { static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, - .write_inode = afs_write_inode, .destroy_inode = afs_destroy_inode, .clear_inode = afs_clear_inode, .put_super = afs_put_super, diff --git a/fs/afs/write.c b/fs/afs/write.c index 5e15a21dbf9..3bed54a294d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -585,27 +585,6 @@ int afs_writepages(struct address_space *mapping, } /* - * write an inode back - */ -int afs_write_inode(struct inode *inode, int sync) -{ - struct afs_vnode *vnode = AFS_FS_I(inode); - int ret; - - _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); - - ret = 0; - if (sync) { - ret = filemap_fdatawait(inode->i_mapping); - if (ret < 0) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - } - - _leave(" = %d", ret); - return ret; -} - -/* * completion of write to server */ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) diff --git a/fs/attr.c b/fs/attr.c index 96d394bdadd..0a6ea54cde7 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -12,7 +12,6 @@ #include <linux/capability.h> #include <linux/fsnotify.h> #include <linux/fcntl.h> -#include <linux/quotaops.h> #include <linux/security.h> /* Taken over from the old code... */ @@ -212,14 +211,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr) error = inode->i_op->setattr(dentry, attr); } else { error = inode_change_ok(inode, attr); - if (!error) { - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) - error = vfs_dq_transfer(inode, attr) ? - -EDQUOT : 0; - if (!error) - error = inode_setattr(inode, attr); - } + if (!error) + error = inode_setattr(inode, attr); } if (ia_valid & ATTR_SIZE) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8f3d9fd8960..f22a7d3dc36 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -15,6 +15,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/vfs.h> +#include <linux/writeback.h> #include <asm/uaccess.h> #include "bfs.h" @@ -98,7 +99,7 @@ error: return ERR_PTR(-EIO); } -static int bfs_write_inode(struct inode *inode, int wait) +static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct bfs_sb_info *info = BFS_SB(inode->i_sb); unsigned int ino = (u16)inode->i_ino; @@ -147,7 +148,7 @@ static int bfs_write_inode(struct inode *inode, int wait) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) err = -EIO; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2aa8ec6a098..8b5cfdd4bfc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2326,7 +2326,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); -int btrfs_write_inode(struct inode *inode, int wait); +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); void btrfs_dirty_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4deb280f896..c41db6d45ab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3968,7 +3968,7 @@ err: return ret; } -int btrfs_write_inode(struct inode *inode, int wait) +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -3977,7 +3977,7 @@ int btrfs_write_inode(struct inode *inode, int wait) if (root->fs_info->btree_inode == inode) return 0; - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 59b8bf2825c..8442e353309 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -261,7 +261,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata); extern struct inode *exofs_iget(struct super_block *, unsigned long); struct inode *exofs_new_inode(struct inode *, int); -extern int exofs_write_inode(struct inode *, int); +extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); extern void exofs_delete_inode(struct inode *); /* dir.c: */ diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 5514f3c2c2f..a17e4b733e3 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1280,9 +1280,9 @@ out: return ret; } -int exofs_write_inode(struct inode *inode, int wait) +int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) { - return exofs_update_inode(inode, wait); + return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } /* diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 7f8d2e5a7ea..1d081f0cfec 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -570,7 +570,7 @@ do_more: error_return: brelse(bitmap_bh); release_blocks(sb, freed); - vfs_dq_free_block(inode, freed); + dquot_free_block(inode, freed); } /** @@ -1236,6 +1236,7 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal, unsigned short windowsz = 0; unsigned long ngroups; unsigned long num = *count; + int ret; *errp = -ENOSPC; sb = inode->i_sb; @@ -1247,8 +1248,9 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal, /* * Check quota for allocation of this block. */ - if (vfs_dq_alloc_block(inode, num)) { - *errp = -EDQUOT; + ret = dquot_alloc_block(inode, num); + if (ret) { + *errp = ret; return 0; } @@ -1409,7 +1411,7 @@ allocated: *errp = 0; brelse(bitmap_bh); - vfs_dq_free_block(inode, *count-num); + dquot_free_block(inode, *count-num); *count = num; return ret_block; @@ -1420,7 +1422,7 @@ out: * Undo the block allocation */ if (!performed_allocation) - vfs_dq_free_block(inode, *count); + dquot_free_block(inode, *count); brelse(bitmap_bh); return 0; } diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 061914add3c..0b038e47ad2 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -118,7 +118,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ extern struct inode *ext2_iget (struct super_block *, unsigned long); -extern int ext2_write_inode (struct inode *, int); +extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_delete_inode (struct inode *); extern int ext2_sync_inode (struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 586e3589d4c..5d198d0697f 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -20,6 +20,7 @@ #include <linux/time.h> #include <linux/pagemap.h> +#include <linux/quotaops.h> #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -70,7 +71,7 @@ const struct file_operations ext2_file_operations = { .compat_ioctl = ext2_compat_ioctl, #endif .mmap = generic_file_mmap, - .open = generic_file_open, + .open = dquot_file_open, .release = ext2_release_file, .fsync = ext2_fsync, .splice_read = generic_file_splice_read, @@ -87,7 +88,7 @@ const struct file_operations ext2_xip_file_operations = { .compat_ioctl = ext2_compat_ioctl, #endif .mmap = xip_file_mmap, - .open = generic_file_open, + .open = dquot_file_open, .release = ext2_release_file, .fsync = ext2_fsync, }; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 15387c9c17d..ad7d572ee8d 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -121,8 +121,8 @@ void ext2_free_inode (struct inode * inode) if (!is_bad_inode(inode)) { /* Quota is already initialized in iput() */ ext2_xattr_delete_inode(inode); - vfs_dq_free_inode(inode); - vfs_dq_drop(inode); + dquot_free_inode(inode); + dquot_drop(inode); } es = EXT2_SB(sb)->s_es; @@ -586,10 +586,10 @@ got: goto fail_drop; } - if (vfs_dq_alloc_inode(inode)) { - err = -EDQUOT; + dquot_initialize(inode); + err = dquot_alloc_inode(inode); + if (err) goto fail_drop; - } err = ext2_init_acl(inode, dir); if (err) @@ -605,10 +605,10 @@ got: return inode; fail_free_drop: - vfs_dq_free_inode(inode); + dquot_free_inode(inode); fail_drop: - vfs_dq_drop(inode); + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; unlock_new_inode(inode); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 71b032c65a0..fc13cc119aa 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -41,6 +41,8 @@ MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_LICENSE("GPL"); +static int __ext2_write_inode(struct inode *inode, int do_sync); + /* * Test whether an inode is a fast symlink. */ @@ -58,13 +60,15 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode) */ void ext2_delete_inode (struct inode * inode) { + if (!is_bad_inode(inode)) + dquot_initialize(inode); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) goto no_delete; EXT2_I(inode)->i_dtime = get_seconds(); mark_inode_dirty(inode); - ext2_write_inode(inode, inode_needs_sync(inode)); + __ext2_write_inode(inode, inode_needs_sync(inode)); inode->i_size = 0; if (inode->i_blocks) @@ -1335,7 +1339,7 @@ bad_inode: return ERR_PTR(ret); } -int ext2_write_inode(struct inode *inode, int do_sync) +static int __ext2_write_inode(struct inode *inode, int do_sync) { struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; @@ -1440,6 +1444,11 @@ int ext2_write_inode(struct inode *inode, int do_sync) return err; } +int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int ext2_sync_inode(struct inode *inode) { struct writeback_control wbc = { @@ -1457,9 +1466,12 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) error = inode_change_ok(inode, iattr); if (error) return error; + + if (iattr->ia_valid & ATTR_SIZE) + dquot_initialize(inode); if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { - error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0; + error = dquot_transfer(inode, iattr); if (error) return error; } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index dd7175ce560..71efb0e9a3f 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -31,6 +31,7 @@ */ #include <linux/pagemap.h> +#include <linux/quotaops.h> #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -99,24 +100,27 @@ struct dentry *ext2_get_parent(struct dentry *child) */ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) { - struct inode * inode = ext2_new_inode (dir, mode); - int err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext2_file_inode_operations; - if (ext2_use_xip(inode->i_sb)) { - inode->i_mapping->a_ops = &ext2_aops_xip; - inode->i_fop = &ext2_xip_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } - mark_inode_dirty(inode); - err = ext2_add_nondir(dentry, inode); + struct inode *inode; + + dquot_initialize(dir); + + inode = ext2_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &ext2_file_inode_operations; + if (ext2_use_xip(inode->i_sb)) { + inode->i_mapping->a_ops = &ext2_aops_xip; + inode->i_fop = &ext2_xip_file_operations; + } else if (test_opt(inode->i_sb, NOBH)) { + inode->i_mapping->a_ops = &ext2_nobh_aops; + inode->i_fop = &ext2_file_operations; + } else { + inode->i_mapping->a_ops = &ext2_aops; + inode->i_fop = &ext2_file_operations; } - return err; + mark_inode_dirty(inode); + return ext2_add_nondir(dentry, inode); } static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) @@ -127,6 +131,8 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_ if (!new_valid_dev(rdev)) return -EINVAL; + dquot_initialize(dir); + inode = ext2_new_inode (dir, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { @@ -151,6 +157,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out; + dquot_initialize(dir); + inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) @@ -194,6 +202,8 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, if (inode->i_nlink >= EXT2_LINK_MAX) return -EMLINK; + dquot_initialize(dir); + inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); atomic_inc(&inode->i_count); @@ -216,6 +226,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (dir->i_nlink >= EXT2_LINK_MAX) goto out; + dquot_initialize(dir); + inode_inc_link_count(dir); inode = ext2_new_inode (dir, S_IFDIR | mode); @@ -262,6 +274,8 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) struct page * page; int err = -ENOENT; + dquot_initialize(dir); + de = ext2_find_entry (dir, &dentry->d_name, &page); if (!de) goto out; @@ -304,6 +318,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, struct ext2_dir_entry_2 * old_de; int err = -ENOENT; + dquot_initialize(old_dir); + dquot_initialize(new_dir); + old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f9cb54a585c..42e4a303b67 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -194,6 +194,8 @@ static void destroy_inodecache(void) static void ext2_clear_inode(struct inode *inode) { struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; + + dquot_drop(inode); ext2_discard_reservation(inode); EXT2_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 904f00642f8..e44dc92609b 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -644,8 +644,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, the inode. */ ea_bdebug(new_bh, "reusing block"); - error = -EDQUOT; - if (vfs_dq_alloc_block(inode, 1)) { + error = dquot_alloc_block(inode, 1); + if (error) { unlock_buffer(new_bh); goto cleanup; } @@ -702,7 +702,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, * as if nothing happened and cleanup the unused block */ if (error && error != -ENOSPC) { if (new_bh && new_bh != old_bh) - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); goto cleanup; } } else @@ -734,7 +734,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, le32_add_cpu(&HDR(old_bh)->h_refcount, -1); if (ce) mb_cache_entry_release(ce); - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", le32_to_cpu(HDR(old_bh)->h_refcount)); @@ -797,7 +797,7 @@ ext2_xattr_delete_inode(struct inode *inode) mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); } EXT2_I(inode)->i_file_acl = 0; diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 27967f92e82..161da2d3f89 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -676,7 +676,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode, } ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); if (dquot_freed_blocks) - vfs_dq_free_block(inode, dquot_freed_blocks); + dquot_free_block(inode, dquot_freed_blocks); return; } @@ -1502,8 +1502,9 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, /* * Check quota for allocation of this block. */ - if (vfs_dq_alloc_block(inode, num)) { - *errp = -EDQUOT; + err = dquot_alloc_block(inode, num); + if (err) { + *errp = err; return 0; } @@ -1713,7 +1714,7 @@ allocated: *errp = 0; brelse(bitmap_bh); - vfs_dq_free_block(inode, *count-num); + dquot_free_block(inode, *count-num); *count = num; return ret_block; @@ -1728,7 +1729,7 @@ out: * Undo the block allocation */ if (!performed_allocation) - vfs_dq_free_block(inode, *count); + dquot_free_block(inode, *count); brelse(bitmap_bh); return 0; } diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 388bbdfa0b4..f55df0e61cb 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -21,6 +21,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd.h> +#include <linux/quotaops.h> #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> #include "xattr.h" @@ -33,9 +34,9 @@ */ static int ext3_release_file (struct inode * inode, struct file * filp) { - if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) { + if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) { filemap_flush(inode->i_mapping); - EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE; + ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && @@ -62,7 +63,7 @@ const struct file_operations ext3_file_operations = { .compat_ioctl = ext3_compat_ioctl, #endif .mmap = generic_file_mmap, - .open = generic_file_open, + .open = dquot_file_open, .release = ext3_release_file, .fsync = ext3_sync_file, .splice_read = generic_file_splice_read, diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index b3999128513..ef9008b885b 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -123,10 +123,10 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ - vfs_dq_init(inode); + dquot_initialize(inode); ext3_xattr_delete_inode(handle, inode); - vfs_dq_free_inode(inode); - vfs_dq_drop(inode); + dquot_free_inode(inode); + dquot_drop(inode); is_directory = S_ISDIR(inode->i_mode); @@ -588,10 +588,10 @@ got: sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; ret = inode; - if (vfs_dq_alloc_inode(inode)) { - err = -EDQUOT; + dquot_initialize(inode); + err = dquot_alloc_inode(inode); + if (err) goto fail_drop; - } err = ext3_init_acl(handle, inode, dir); if (err) @@ -619,10 +619,10 @@ really_out: return ret; fail_free_drop: - vfs_dq_free_inode(inode); + dquot_free_inode(inode); fail_drop: - vfs_dq_drop(inode); + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; unlock_new_inode(inode); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 455e6e6e5cb..7f920b7263a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -196,6 +196,9 @@ void ext3_delete_inode (struct inode * inode) { handle_t *handle; + if (!is_bad_inode(inode)) + dquot_initialize(inode); + truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) @@ -1378,7 +1381,7 @@ static int ext3_journalled_write_end(struct file *file, */ if (pos + len > inode->i_size && ext3_can_truncate(inode)) ext3_orphan_add(handle, inode); - EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; + ext3_set_inode_state(inode, EXT3_STATE_JDATA); if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; ret2 = ext3_mark_inode_dirty(handle, inode); @@ -1417,7 +1420,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block) journal_t *journal; int err; - if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { + if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: @@ -1436,7 +1439,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block) * everything they get. */ - EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA; + ext3_clear_inode_state(inode, EXT3_STATE_JDATA); journal = EXT3_JOURNAL(inode); journal_lock_updates(journal); err = journal_flush(journal); @@ -1528,6 +1531,7 @@ static int ext3_ordered_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); + WARN_ON_ONCE(IS_RDONLY(inode)); /* * We give up here if we're reentered, because it might be for a @@ -1600,6 +1604,9 @@ static int ext3_writeback_writepage(struct page *page, int ret = 0; int err; + J_ASSERT(PageLocked(page)); + WARN_ON_ONCE(IS_RDONLY(inode)); + if (ext3_journal_current_handle()) goto out_fail; @@ -1642,6 +1649,9 @@ static int ext3_journalled_writepage(struct page *page, int ret = 0; int err; + J_ASSERT(PageLocked(page)); + WARN_ON_ONCE(IS_RDONLY(inode)); + if (ext3_journal_current_handle()) goto no_write; @@ -1670,7 +1680,7 @@ static int ext3_journalled_writepage(struct page *page, PAGE_CACHE_SIZE, NULL, write_end_fn); if (ret == 0) ret = err; - EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; + ext3_set_inode_state(inode, EXT3_STATE_JDATA); unlock_page(page); } else { /* @@ -1785,8 +1795,9 @@ retry: handle = ext3_journal_start(inode, 2); if (IS_ERR(handle)) { /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ + * but cannot extend i_size. Truncate allocated blocks + * and pretend the write failed... */ + ext3_truncate(inode); ret = PTR_ERR(handle); goto out; } @@ -2402,7 +2413,7 @@ void ext3_truncate(struct inode *inode) goto out_notrans; if (inode->i_size == 0 && ext3_should_writeback_data(inode)) - ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; + ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); /* * We have to lock the EOF page here, because lock_page() nests @@ -2721,7 +2732,7 @@ int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) { /* We have all inode data except xattrs in memory here. */ return __ext3_get_inode_loc(inode, iloc, - !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)); + !ext3_test_inode_state(inode, EXT3_STATE_XATTR)); } void ext3_set_inode_flags(struct inode *inode) @@ -2893,7 +2904,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC)) - ei->i_state |= EXT3_STATE_XATTR; + ext3_set_inode_state(inode, EXT3_STATE_XATTR); } } else ei->i_extra_isize = 0; @@ -2955,7 +2966,7 @@ again: /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ - if (ei->i_state & EXT3_STATE_NEW) + if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); ext3_get_inode_flags(ei); @@ -3052,7 +3063,7 @@ again: rc = ext3_journal_dirty_metadata(handle, bh); if (!err) err = rc; - ei->i_state &= ~EXT3_STATE_NEW; + ext3_clear_inode_state(inode, EXT3_STATE_NEW); atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); out_brelse: @@ -3096,7 +3107,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext3_write_inode(struct inode *inode, int wait) +int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) { if (current->flags & PF_MEMALLOC) return 0; @@ -3107,7 +3118,7 @@ int ext3_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; return ext3_force_commit(inode->i_sb); @@ -3140,6 +3151,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + if (ia_valid & ATTR_SIZE) + dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { handle_t *handle; @@ -3152,7 +3165,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) error = PTR_ERR(handle); goto err_out; } - error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + error = dquot_transfer(inode, attr); if (error) { ext3_journal_stop(handle); return error; @@ -3237,7 +3250,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode) ret = 2 * (bpp + indirects) + 2; #ifdef CONFIG_QUOTA - /* We know that structure was already allocated during vfs_dq_init so + /* We know that structure was already allocated during dquot_initialize so * we will be updating only the data blocks + inodes */ ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); #endif @@ -3328,7 +3341,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) * i_size has been changed by generic_commit_write() and we thus need * to include the updated inode in the current transaction. * - * Also, vfs_dq_alloc_space() will always dirty the inode when blocks + * Also, dquot_alloc_space() will always dirty the inode when blocks * are allocated to the file. * * If the inode is marked synchronous, we don't honour that here - doing diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 7b0e44f7d66..ee184084ca4 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1696,6 +1696,8 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, struct inode * inode; int err, retries = 0; + dquot_initialize(dir); + retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -1730,6 +1732,8 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; + dquot_initialize(dir); + retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -1766,6 +1770,8 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (dir->i_nlink >= EXT3_LINK_MAX) return -EMLINK; + dquot_initialize(dir); + retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -2060,7 +2066,9 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go in * separate transaction */ - vfs_dq_init(dentry->d_inode); + dquot_initialize(dir); + dquot_initialize(dentry->d_inode); + handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2119,7 +2127,9 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go * in separate transaction */ - vfs_dq_init(dentry->d_inode); + dquot_initialize(dir); + dquot_initialize(dentry->d_inode); + handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2174,6 +2184,8 @@ static int ext3_symlink (struct inode * dir, if (l > dir->i_sb->s_blocksize) return -ENAMETOOLONG; + dquot_initialize(dir); + retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + @@ -2228,6 +2240,9 @@ static int ext3_link (struct dentry * old_dentry, if (inode->i_nlink >= EXT3_LINK_MAX) return -EMLINK; + + dquot_initialize(dir); + /* * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing * otherwise has the potential to corrupt the orphan inode list. @@ -2278,12 +2293,15 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, struct ext3_dir_entry_2 * old_de, * new_de; int retval, flush_file = 0; + dquot_initialize(old_dir); + dquot_initialize(new_dir); + old_bh = new_bh = dir_bh = NULL; /* Initialize quotas before so that eventual writes go * in separate transaction */ if (new_dentry->d_inode) - vfs_dq_init(new_dentry->d_inode); + dquot_initialize(new_dentry->d_inode); handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index afa2b569da1..e844accbf55 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -181,7 +181,7 @@ static void ext3_handle_error(struct super_block *sb) if (!test_opt (sb, ERRORS_CONT)) { journal_t *journal = EXT3_SB(sb)->s_journal; - EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; + set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); if (journal) journal_abort(journal, -EIO); } @@ -296,7 +296,7 @@ void ext3_abort (struct super_block * sb, const char * function, "error: remounting filesystem read-only"); EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; sb->s_flags |= MS_RDONLY; - EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; + set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); if (EXT3_SB(sb)->s_journal) journal_abort(EXT3_SB(sb)->s_journal, -EIO); } @@ -528,6 +528,8 @@ static void destroy_inodecache(void) static void ext3_clear_inode(struct inode *inode) { struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; + + dquot_drop(inode); ext3_discard_reservation(inode); EXT3_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) @@ -562,10 +564,10 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); #endif } @@ -656,8 +658,7 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); - seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt & - EXT3_MOUNT_DATA_FLAGS)); + seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); if (test_opt(sb, DATA_ERR_ABORT)) seq_puts(seq, ",data_err=abort"); @@ -751,13 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); static const struct dquot_operations ext3_quota_operations = { - .initialize = dquot_initialize, - .drop = dquot_drop, - .alloc_space = dquot_alloc_space, - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, .write_dquot = ext3_write_dquot, .acquire_dquot = ext3_acquire_dquot, .release_dquot = ext3_release_dquot, @@ -896,6 +890,63 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) return sb_block; } +#ifdef CONFIG_QUOTA +static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) +{ + struct ext3_sb_info *sbi = EXT3_SB(sb); + char *qname; + + if (sb_any_quota_loaded(sb) && + !sbi->s_qf_names[qtype]) { + ext3_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); + return 0; + } + qname = match_strdup(args); + if (!qname) { + ext3_msg(sb, KERN_ERR, + "Not enough memory for storing quotafile name"); + return 0; + } + if (sbi->s_qf_names[qtype] && + strcmp(sbi->s_qf_names[qtype], qname)) { + ext3_msg(sb, KERN_ERR, + "%s quota file already specified", QTYPE2NAME(qtype)); + kfree(qname); + return 0; + } + sbi->s_qf_names[qtype] = qname; + if (strchr(sbi->s_qf_names[qtype], '/')) { + ext3_msg(sb, KERN_ERR, + "quotafile must be on filesystem root"); + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; + } + set_opt(sbi->s_mount_opt, QUOTA); + return 1; +} + +static int clear_qf_name(struct super_block *sb, int qtype) { + + struct ext3_sb_info *sbi = EXT3_SB(sb); + + if (sb_any_quota_loaded(sb) && + sbi->s_qf_names[qtype]) { + ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options" + " when quota turned on"); + return 0; + } + /* + * The space will be released later when all options are confirmed + * to be correct + */ + sbi->s_qf_names[qtype] = NULL; + return 1; +} +#endif + static int parse_options (char *options, struct super_block *sb, unsigned int *inum, unsigned long *journal_devnum, ext3_fsblk_t *n_blocks_count, int is_remount) @@ -906,8 +957,7 @@ static int parse_options (char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype, qfmt; - char *qname; + int qfmt; #endif if (!options) @@ -1065,20 +1115,19 @@ static int parse_options (char *options, struct super_block *sb, data_opt = EXT3_MOUNT_WRITEBACK_DATA; datacheck: if (is_remount) { - if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) - == data_opt) + if (test_opt(sb, DATA_FLAGS) == data_opt) break; ext3_msg(sb, KERN_ERR, "error: cannot change " "data mode on remount. The filesystem " "is mounted in data=%s mode and you " "try to remount it in data=%s mode.", - data_mode_string(sbi->s_mount_opt & - EXT3_MOUNT_DATA_FLAGS), + data_mode_string(test_opt(sb, + DATA_FLAGS)), data_mode_string(data_opt)); return 0; } else { - sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; + clear_opt(sbi->s_mount_opt, DATA_FLAGS); sbi->s_mount_opt |= data_opt; } break; @@ -1090,62 +1139,20 @@ static int parse_options (char *options, struct super_block *sb, break; #ifdef CONFIG_QUOTA case Opt_usrjquota: - qtype = USRQUOTA; - goto set_qf_name; - case Opt_grpjquota: - qtype = GRPQUOTA; -set_qf_name: - if (sb_any_quota_loaded(sb) && - !sbi->s_qf_names[qtype]) { - ext3_msg(sb, KERN_ERR, - "error: cannot change journaled " - "quota options when quota turned on."); - return 0; - } - qname = match_strdup(&args[0]); - if (!qname) { - ext3_msg(sb, KERN_ERR, - "error: not enough memory for " - "storing quotafile name."); + if (!set_qf_name(sb, USRQUOTA, &args[0])) return 0; - } - if (sbi->s_qf_names[qtype] && - strcmp(sbi->s_qf_names[qtype], qname)) { - ext3_msg(sb, KERN_ERR, - "error: %s quota file already " - "specified.", QTYPE2NAME(qtype)); - kfree(qname); - return 0; - } - sbi->s_qf_names[qtype] = qname; - if (strchr(sbi->s_qf_names[qtype], '/')) { - ext3_msg(sb, KERN_ERR, - "error: quotafile must be on " - "filesystem root."); - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + break; + case Opt_grpjquota: + if (!set_qf_name(sb, GRPQUOTA, &args[0])) return 0; - } - set_opt(sbi->s_mount_opt, QUOTA); break; case Opt_offusrjquota: - qtype = USRQUOTA; - goto clear_qf_name; + if (!clear_qf_name(sb, USRQUOTA)) + return 0; + break; case Opt_offgrpjquota: - qtype = GRPQUOTA; -clear_qf_name: - if (sb_any_quota_loaded(sb) && - sbi->s_qf_names[qtype]) { - ext3_msg(sb, KERN_ERR, "error: cannot change " - "journaled quota options when " - "quota turned on."); + if (!clear_qf_name(sb, GRPQUOTA)) return 0; - } - /* - * The space will be released later when all options - * are confirmed to be correct - */ - sbi->s_qf_names[qtype] = NULL; break; case Opt_jqfmt_vfsold: qfmt = QFMT_VFS_OLD; @@ -1244,18 +1251,12 @@ set_qf_format: } #ifdef CONFIG_QUOTA if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) && - sbi->s_qf_names[USRQUOTA]) + if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sbi->s_mount_opt, USRQUOTA); - - if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) && - sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) clear_opt(sbi->s_mount_opt, GRPQUOTA); - if ((sbi->s_qf_names[USRQUOTA] && - (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || - (sbi->s_qf_names[GRPQUOTA] && - (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { + if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { ext3_msg(sb, KERN_ERR, "error: old and new quota " "format mixing."); return 0; @@ -1478,7 +1479,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, } list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - vfs_dq_init(inode); + dquot_initialize(inode); if (inode->i_nlink) { printk(KERN_DEBUG "%s: truncating inode %lu to %Ld bytes\n", @@ -1671,11 +1672,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, POSIX_ACL); #endif if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) - sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA; + set_opt(sbi->s_mount_opt, JOURNAL_DATA); else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) - sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA; + set_opt(sbi->s_mount_opt, ORDERED_DATA); else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) - sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA; + set_opt(sbi->s_mount_opt, WRITEBACK_DATA); if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); @@ -1694,7 +1695,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || @@ -2561,11 +2562,11 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) goto restore_opts; } - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) + if (test_opt(sb, ABORT)) ext3_abort(sb, __func__, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); es = sbi->s_es; @@ -2573,7 +2574,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || n_blocks_count > le32_to_cpu(es->s_blocks_count)) { - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) { + if (test_opt(sb, ABORT)) { err = -EROFS; goto restore_opts; } @@ -2734,7 +2735,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) * Process 1 Process 2 * ext3_create() quota_sync() * journal_start() write_dquot() - * vfs_dq_init() down(dqio_mutex) + * dquot_initialize() down(dqio_mutex) * down(dqio_mutex) journal_start() * */ @@ -2942,9 +2943,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); - int tocopy; int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -2955,53 +2954,54 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, (unsigned long long)off, (unsigned long long)len); return -EIO; } + + /* + * Since we account only one data block in transaction credits, + * then it is impossible to cross a block boundary. + */ + if (sb->s_blocksize - offset < len) { + ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" + " cancelled because not block aligned", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - bh = ext3_bread(handle, inode, blk, 1, &err); - if (!bh) + bh = ext3_bread(handle, inode, blk, 1, &err); + if (!bh) + goto out; + if (journal_quota) { + err = ext3_journal_get_write_access(handle, bh); + if (err) { + brelse(bh); goto out; - if (journal_quota) { - err = ext3_journal_get_write_access(handle, bh); - if (err) { - brelse(bh); - goto out; - } - } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - unlock_buffer(bh); - if (journal_quota) - err = ext3_journal_dirty_metadata(handle, bh); - else { - /* Always do at least ordered writes for quotas */ - err = ext3_journal_dirty_data(handle, bh); - mark_buffer_dirty(bh); } - brelse(bh); - if (err) - goto out; - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, len); + flush_dcache_page(bh->b_page); + unlock_buffer(bh); + if (journal_quota) + err = ext3_journal_dirty_metadata(handle, bh); + else { + /* Always do at least ordered writes for quotas */ + err = ext3_journal_dirty_data(handle, bh); + mark_buffer_dirty(bh); + } + brelse(bh); out: - if (len == towrite) { + if (err) { mutex_unlock(&inode->i_mutex); return err; } - if (inode->i_size < off+len-towrite) { - i_size_write(inode, off+len-towrite); + if (inode->i_size < off + len) { + i_size_write(inode, off + len); EXT3_I(inode)->i_disksize = inode->i_size; } inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; ext3_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); - return len - towrite; + return len; } #endif diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 66895ccf76c..534a94c3a93 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -274,7 +274,7 @@ ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *end; int error; - if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)) + if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR)) return -ENODATA; error = ext3_get_inode_loc(inode, &iloc); if (error) @@ -403,7 +403,7 @@ ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) void *end; int error; - if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)) + if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR)) return 0; error = ext3_get_inode_loc(inode, &iloc); if (error) @@ -500,7 +500,7 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode, error = ext3_journal_dirty_metadata(handle, bh); if (IS_SYNC(inode)) handle->h_sync = 1; - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); if (ce) @@ -775,8 +775,8 @@ inserted: else { /* The old block is released after updating the inode. */ - error = -EDQUOT; - if (vfs_dq_alloc_block(inode, 1)) + error = dquot_alloc_block(inode, 1); + if (error) goto cleanup; error = ext3_journal_get_write_access(handle, new_bh); @@ -850,7 +850,7 @@ cleanup: return error; cleanup_dquot: - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); goto cleanup; bad_block: @@ -882,7 +882,7 @@ ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i, is->s.base = is->s.first = IFIRST(header); is->s.here = is->s.first; is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; - if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) { + if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) { error = ext3_xattr_check_names(IFIRST(header), is->s.end); if (error) return error; @@ -914,10 +914,10 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, header = IHDR(inode, ext3_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); - EXT3_I(inode)->i_state |= EXT3_STATE_XATTR; + ext3_set_inode_state(inode, EXT3_STATE_XATTR); } else { header->h_magic = cpu_to_le32(0); - EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR; + ext3_clear_inode_state(inode, EXT3_STATE_XATTR); } return 0; } @@ -967,10 +967,10 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (error) goto cleanup; - if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { + if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) { struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc); memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); - EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW; + ext3_clear_inode_state(inode, EXT3_STATE_NEW); } error = ext3_xattr_ibody_find(inode, &i, &is); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 22bc7435d91..d2f37a5516c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -97,8 +97,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __func__, - "Checksum bad for group %u", block_group); + ext4_error(sb, "Checksum bad for group %u", + block_group); ext4_free_blks_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); @@ -130,8 +130,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * to make sure we calculate the right free blocks */ group_blocks = ext4_blocks_count(sbi->s_es) - - le32_to_cpu(sbi->s_es->s_first_data_block) - - (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); + ext4_group_first_block_no(sb, ngroups - 1); } else { group_blocks = EXT4_BLOCKS_PER_GROUP(sb); } @@ -189,9 +188,6 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * when a file system is mounted (see ext4_fill_super). */ - -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - /** * ext4_get_group_desc() -- load group descriptor from disk * @sb: super block @@ -210,10 +206,8 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); if (block_group >= ngroups) { - ext4_error(sb, "ext4_get_group_desc", - "block_group >= groups_count - " - "block_group = %u, groups_count = %u", - block_group, ngroups); + ext4_error(sb, "block_group >= groups_count - block_group = %u," + " groups_count = %u", block_group, ngroups); return NULL; } @@ -221,8 +215,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext4_error(sb, "ext4_get_group_desc", - "Group descriptor not loaded - " + ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); return NULL; @@ -282,9 +275,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb, return 1; err_out: - ext4_error(sb, __func__, - "Invalid block bitmap - " - "block_group = %d, block = %llu", + ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", block_group, bitmap_blk); return 0; } @@ -311,8 +302,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) bitmap_blk = ext4_block_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, __func__, - "Cannot read block bitmap - " + ext4_error(sb, "Cannot read block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -354,8 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) set_bitmap_uptodate(bh); if (bh_submit_read(bh) < 0) { put_bh(bh); - ext4_error(sb, __func__, - "Cannot read block bitmap - " + ext4_error(sb, "Cannot read block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -419,8 +408,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || in_range(block + count - 1, ext4_inode_table(sb, desc), sbi->s_itb_per_group)) { - ext4_error(sb, __func__, - "Adding blocks in system zones - " + ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); goto error_return; @@ -453,8 +441,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, BUFFER_TRACE(bitmap_bh, "clear bit"); if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), bit + i, bitmap_bh->b_data)) { - ext4_error(sb, __func__, - "bit already cleared for block %llu", + ext4_error(sb, "bit already cleared for block %llu", (ext4_fsblk_t)(block + i)); BUFFER_TRACE(bitmap_bh, "bit already cleared"); } else { diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index a60ab9aad57..983f0e12749 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -205,14 +205,14 @@ void ext4_release_system_zone(struct super_block *sb) entry = rb_entry(n, struct ext4_system_zone, node); kmem_cache_free(ext4_system_zone_cachep, entry); if (!parent) - EXT4_SB(sb)->system_blks.rb_node = NULL; + EXT4_SB(sb)->system_blks = RB_ROOT; else if (parent->rb_left == n) parent->rb_left = NULL; else if (parent->rb_right == n) parent->rb_right = NULL; n = parent; } - EXT4_SB(sb)->system_blks.rb_node = NULL; + EXT4_SB(sb)->system_blks = RB_ROOT; } /* diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 9dc93168e26..86cb6d86a04 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -83,10 +83,12 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, error_msg = "inode out of bounds"; if (error_msg != NULL) - ext4_error(dir->i_sb, function, - "bad entry in directory #%lu: %s - " - "offset=%u, inode=%u, rec_len=%d, name_len=%d", - dir->i_ino, error_msg, offset, + __ext4_error(dir->i_sb, function, + "bad entry in directory #%lu: %s - block=%llu" + "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", + dir->i_ino, error_msg, + (unsigned long long) bh->b_blocknr, + (unsigned) (offset%bh->b_size), offset, le32_to_cpu(de->inode), rlen, de->name_len); return error_msg == NULL ? 1 : 0; @@ -150,7 +152,7 @@ static int ext4_readdir(struct file *filp, */ if (!bh) { if (!dir_has_error) { - ext4_error(sb, __func__, "directory #%lu " + ext4_error(sb, "directory #%lu " "contains a hole at offset %Lu", inode->i_ino, (unsigned long long) filp->f_pos); @@ -303,7 +305,7 @@ static void free_rb_tree_fname(struct rb_root *root) kfree(old); } if (!parent) - root->rb_node = NULL; + *root = RB_ROOT; else if (parent->rb_left == n) parent->rb_left = NULL; else if (parent->rb_right == n) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4cedc91ec59..bf938cf7c5f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -53,6 +53,12 @@ #define ext4_debug(f, a...) do {} while (0) #endif +#define EXT4_ERROR_INODE(inode, fmt, a...) \ + ext4_error_inode(__func__, (inode), (fmt), ## a); + +#define EXT4_ERROR_FILE(file, fmt, a...) \ + ext4_error_file(__func__, (file), (fmt), ## a); + /* data type for block offset of block group */ typedef int ext4_grpblk_t; @@ -133,14 +139,14 @@ struct mpage_da_data { int pages_written; int retval; }; -#define DIO_AIO_UNWRITTEN 0x1 +#define EXT4_IO_UNWRITTEN 0x1 typedef struct ext4_io_end { struct list_head list; /* per-file finished AIO list */ struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ - int error; /* I/O error code */ - ext4_lblk_t offset; /* offset in the file */ - size_t size; /* size of the extent */ + struct page *page; /* page struct for buffer write */ + loff_t offset; /* offset in the file */ + ssize_t size; /* size of the extent */ struct work_struct work; /* data work queue */ } ext4_io_end_t; @@ -284,10 +290,12 @@ struct flex_groups { #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ +#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */ /* Flags that should be inherited by new inodes from their parent. */ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ @@ -313,17 +321,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) return flags & EXT4_OTHER_FLMASK; } -/* - * Inode dynamic state flags - */ -#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ -#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ -#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ -#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ -#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ -#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ -#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/ - /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { __u32 group; /* Group number for this data */ @@ -364,19 +361,20 @@ struct ext4_new_group_data { /* caller is from the direct IO path, request to creation of an unitialized extents if not allocated, split the uninitialized extent if blocks has been preallocated already*/ -#define EXT4_GET_BLOCKS_DIO 0x0008 +#define EXT4_GET_BLOCKS_PRE_IO 0x0008 #define EXT4_GET_BLOCKS_CONVERT 0x0010 -#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ +#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ + EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) + /* Convert extent to initialized after IO complete */ +#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) - /* Convert extent to initialized after direct IO complete */ -#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ - EXT4_GET_BLOCKS_DIO_CREATE_EXT) /* * Flags used by ext4_free_blocks */ #define EXT4_FREE_BLOCKS_METADATA 0x0001 #define EXT4_FREE_BLOCKS_FORGET 0x0002 +#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 /* * ioctl commands @@ -630,7 +628,7 @@ struct ext4_inode_info { * near to their parent directory's inode. */ ext4_group_t i_block_group; - __u32 i_state; /* Dynamic state flags for ext4 */ + unsigned long i_state_flags; /* Dynamic state flags */ ext4_lblk_t i_dir_start_lookup; #ifdef CONFIG_EXT4_FS_XATTR @@ -708,8 +706,9 @@ struct ext4_inode_info { qsize_t i_reserved_quota; #endif - /* completed async DIOs that might need unwritten extents handling */ - struct list_head i_aio_dio_complete_list; + /* completed IOs that might need unwritten extents handling */ + struct list_head i_completed_io_list; + spinlock_t i_completed_io_lock; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; @@ -760,6 +759,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ @@ -1050,6 +1050,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) (ino >= EXT4_FIRST_INO(sb) && ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } + +/* + * Inode dynamic state flags + */ +enum { + EXT4_STATE_JDATA, /* journaled data exists */ + EXT4_STATE_NEW, /* inode is newly created */ + EXT4_STATE_XATTR, /* has in-inode xattrs */ + EXT4_STATE_NO_EXPAND, /* No space for expansion */ + EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ + EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ + EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ +}; + +static inline int ext4_test_inode_state(struct inode *inode, int bit) +{ + return test_bit(bit, &EXT4_I(inode)->i_state_flags); +} + +static inline void ext4_set_inode_state(struct inode *inode, int bit) +{ + set_bit(bit, &EXT4_I(inode)->i_state_flags); +} + +static inline void ext4_clear_inode_state(struct inode *inode, int bit) +{ + clear_bit(bit, &EXT4_I(inode)->i_state_flags); +} #else /* Assume that user mode programs are passing in an ext4fs superblock, not * a kernel struct super_block. This will allow us to call the feature-test @@ -1126,6 +1154,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 +#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ +#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ @@ -1416,7 +1446,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern int ext4_write_inode(struct inode *, int); +extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -1439,7 +1469,7 @@ extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); -extern int flush_aio_dio_completed_IO(struct inode *inode); +extern int flush_completed_IO(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); /* ioctl.c */ @@ -1465,13 +1495,20 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ -extern void ext4_error(struct super_block *, const char *, const char *, ...) +extern void __ext4_error(struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) +extern void ext4_error_inode(const char *, struct inode *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_error_file(const char *, struct file *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void __ext4_std_error(struct super_block *, const char *, int); extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ext4_warning(struct super_block *, const char *, const char *, ...) +extern void __ext4_warning(struct super_block *, const char *, + const char *, ...) __attribute__ ((format (printf, 3, 4))); +#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) extern void ext4_msg(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, @@ -1744,7 +1781,7 @@ extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - loff_t len); + ssize_t len); extern int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, unsigned int max_blocks, struct buffer_head *bh, int flags); @@ -1756,6 +1793,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 len, __u64 *moved_len); +/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ +enum ext4_state_bits { + BH_Uninit /* blocks are allocated but uninitialized on disk */ + = BH_JBDPrivateStart, +}; + +BUFFER_FNS(Uninit, uninit) +TAS_BUFFER_FNS(Uninit, uninit) + /* * Add new method to test wether block and inode bitmaps are properly * initialized. With uninit_bg reading the block from disk is not enough @@ -1773,6 +1819,8 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); } +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b57e5c711b6..53d2764d71c 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -125,14 +125,14 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, ext4_journal_abort_handle(where, __func__, bh, handle, err); } else { - if (inode && bh) + if (inode) mark_buffer_dirty_inode(bh, inode); else mark_buffer_dirty(bh); if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "IO error syncing inode, " "inode=%lu, block=%llu", inode->i_ino, diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 05eca817d70..b79ad512646 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -304,4 +304,28 @@ static inline int ext4_should_writeback_data(struct inode *inode) return 0; } +/* + * This function controls whether or not we should try to go down the + * dioread_nolock code paths, which makes it safe to avoid taking + * i_mutex for direct I/O reads. This only works for extent-based + * files, and it doesn't work for nobh or if data journaling is + * enabled, since the dioread_nolock code uses b_private to pass + * information back to the I/O completion handler, and this conflicts + * with the jbd's use of b_private. + */ +static inline int ext4_should_dioread_nolock(struct inode *inode) +{ + if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) + return 0; + if (test_opt(inode->i_sb, NOBH)) + return 0; + if (!S_ISREG(inode->i_mode)) + return 0; + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + return 0; + if (ext4_should_journal_data(inode)) + return 0; + return 1; +} + #endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 765a4826b11..94c8ee81f5e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, if (S_ISREG(inode->i_mode)) block_group++; } - bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; /* @@ -440,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode, return 0; corrupted: - ext4_error(inode->i_sb, function, + __ext4_error(inode->i_sb, function, "bad header/extent in inode #%lu: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), @@ -703,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, } eh = ext_block_hdr(bh); ppos++; - BUG_ON(ppos > depth); + if (unlikely(ppos > depth)) { + put_bh(bh); + EXT4_ERROR_INODE(inode, + "ppos %d > depth %d", ppos, depth); + goto err; + } path[ppos].p_bh = bh; path[ppos].p_hdr = eh; i--; @@ -749,7 +753,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (err) return err; - BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); + if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { + EXT4_ERROR_INODE(inode, + "logical %d == ei_block %d!", + logical, le32_to_cpu(curp->p_idx->ei_block)); + return -EIO; + } len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ @@ -779,9 +788,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ext4_idx_store_pblock(ix, ptr); le16_add_cpu(&curp->p_hdr->eh_entries, 1); - BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) - > le16_to_cpu(curp->p_hdr->eh_max)); - BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); + if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) + > le16_to_cpu(curp->p_hdr->eh_max))) { + EXT4_ERROR_INODE(inode, + "logical %d == ei_block %d!", + logical, le32_to_cpu(curp->p_idx->ei_block)); + return -EIO; + } + if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { + EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); + return -EIO; + } err = ext4_ext_dirty(handle, inode, curp); ext4_std_error(inode->i_sb, err); @@ -819,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, /* if current leaf will be split, then we should use * border from split point */ - BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); + if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); + return -EIO; + } if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { border = path[depth].p_ext[1].ee_block; ext_debug("leaf will be split." @@ -860,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, /* initialize new leaf */ newblock = ablocks[--a]; - BUG_ON(newblock == 0); + if (unlikely(newblock == 0)) { + EXT4_ERROR_INODE(inode, "newblock == 0!"); + err = -EIO; + goto cleanup; + } bh = sb_getblk(inode->i_sb, newblock); if (!bh) { err = -EIO; @@ -880,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ex = EXT_FIRST_EXTENT(neh); /* move remainder of path[depth] to the new leaf */ - BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); + if (unlikely(path[depth].p_hdr->eh_entries != + path[depth].p_hdr->eh_max)) { + EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", + path[depth].p_hdr->eh_entries, + path[depth].p_hdr->eh_max); + err = -EIO; + goto cleanup; + } /* start copy from next extent */ /* TODO: we could do it by single memmove */ m = 0; @@ -927,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, /* create intermediate indexes */ k = depth - at - 1; - BUG_ON(k < 0); + if (unlikely(k < 0)) { + EXT4_ERROR_INODE(inode, "k %d < 0!", k); + err = -EIO; + goto cleanup; + } if (k) ext_debug("create %d intermediate indices\n", k); /* insert new index into current index block */ @@ -964,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, EXT_MAX_INDEX(path[i].p_hdr)); - BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != - EXT_LAST_INDEX(path[i].p_hdr)); + if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != + EXT_LAST_INDEX(path[i].p_hdr))) { + EXT4_ERROR_INODE(inode, + "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", + le32_to_cpu(path[i].p_ext->ee_block)); + err = -EIO; + goto cleanup; + } while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ext_debug("%d: move %d:%llu in new index %llu\n", i, le32_to_cpu(path[i].p_idx->ei_block), @@ -1203,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex; int depth, ee_len; - BUG_ON(path == NULL); + if (unlikely(path == NULL)) { + EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); + return -EIO; + } depth = path->p_depth; *phys = 0; @@ -1217,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, ex = path[depth].p_ext; ee_len = ext4_ext_get_actual_len(ex); if (*logical < le32_to_cpu(ex->ee_block)) { - BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); + if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { + EXT4_ERROR_INODE(inode, + "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", + *logical, le32_to_cpu(ex->ee_block)); + return -EIO; + } while (--depth >= 0) { ix = path[depth].p_idx; - BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); + if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, + "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", + ix != NULL ? ix->ei_block : 0, + EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? + EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, + depth); + return -EIO; + } } return 0; } - BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); + if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { + EXT4_ERROR_INODE(inode, + "logical %d < ee_block %d + ee_len %d!", + *logical, le32_to_cpu(ex->ee_block), ee_len); + return -EIO; + } *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; *phys = ext_pblock(ex) + ee_len - 1; @@ -1251,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; - BUG_ON(path == NULL); + if (unlikely(path == NULL)) { + EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); + return -EIO; + } depth = path->p_depth; *phys = 0; @@ -1265,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, ex = path[depth].p_ext; ee_len = ext4_ext_get_actual_len(ex); if (*logical < le32_to_cpu(ex->ee_block)) { - BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); + if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { + EXT4_ERROR_INODE(inode, + "first_extent(path[%d].p_hdr) != ex", + depth); + return -EIO; + } while (--depth >= 0) { ix = path[depth].p_idx; - BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); + if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, + "ix != EXT_FIRST_INDEX *logical %d!", + *logical); + return -EIO; + } } *logical = le32_to_cpu(ex->ee_block); *phys = ext_pblock(ex); return 0; } - BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); + if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { + EXT4_ERROR_INODE(inode, + "logical %d < ee_block %d + ee_len %d!", + *logical, le32_to_cpu(ex->ee_block), ee_len); + return -EIO; + } if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { /* next allocated block in this leaf */ @@ -1414,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, eh = path[depth].p_hdr; ex = path[depth].p_ext; - BUG_ON(ex == NULL); - BUG_ON(eh == NULL); + + if (unlikely(ex == NULL || eh == NULL)) { + EXT4_ERROR_INODE(inode, + "ex %p == NULL or eh %p == NULL", ex, eh); + return -EIO; + } if (depth == 0) { /* there is no tree at all */ @@ -1538,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode, merge_done = 1; WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries) - ext4_error(inode->i_sb, "ext4_ext_try_to_merge", - "inode#%lu, eh->eh_entries = 0!", inode->i_ino); + ext4_error(inode->i_sb, + "inode#%lu, eh->eh_entries = 0!", + inode->i_ino); } return merge_done; @@ -1612,13 +1697,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext4_lblk_t next; unsigned uninitialized = 0; - BUG_ON(ext4_ext_get_actual_len(newext) == 0); + if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { + EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); + return -EIO; + } depth = ext_depth(inode); ex = path[depth].p_ext; - BUG_ON(path[depth].p_hdr == NULL); + if (unlikely(path[depth].p_hdr == NULL)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + return -EIO; + } /* try to insert block into found extent and return */ - if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) + if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) && ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", ext4_ext_is_uninitialized(newext), @@ -1739,7 +1830,7 @@ has_space: merge: /* try to merge extents to the right */ - if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) + if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) ext4_ext_try_to_merge(inode, path, nearex); /* try to merge extents to the left */ @@ -1787,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, } depth = ext_depth(inode); - BUG_ON(path[depth].p_hdr == NULL); + if (unlikely(path[depth].p_hdr == NULL)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + err = -EIO; + break; + } ex = path[depth].p_ext; next = ext4_ext_next_allocated_block(path); @@ -1838,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, cbex.ec_type = EXT4_EXT_CACHE_EXTENT; } - BUG_ON(cbex.ec_len == 0); + if (unlikely(cbex.ec_len == 0)) { + EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); + err = -EIO; + break; + } err = func(inode, path, &cbex, ex, cbdata); ext4_ext_drop_refs(path); @@ -1952,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && cex->ec_type != EXT4_EXT_CACHE_EXTENT); - if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { + if (in_range(block, cex->ec_block, cex->ec_len)) { ex->ee_block = cpu_to_le32(cex->ec_block); ext4_ext_store_pblock(ex, cex->ec_start); ex->ee_len = cpu_to_le16(cex->ec_len); @@ -1981,7 +2080,10 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, /* free index block */ path--; leaf = idx_pblock(path->p_idx); - BUG_ON(path->p_hdr->eh_entries == 0); + if (unlikely(path->p_hdr->eh_entries == 0)) { + EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); + return -EIO; + } err = ext4_ext_get_access(handle, inode, path); if (err) return err; @@ -2119,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; - BUG_ON(eh == NULL); - + if (unlikely(path[depth].p_hdr == NULL)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + return -EIO; + } /* find where to start removing */ ex = EXT_LAST_EXTENT(eh); @@ -2983,7 +3087,7 @@ fix_extent_len: ext4_ext_dirty(handle, inode, path + depth); return err; } -static int ext4_convert_unwritten_extents_dio(handle_t *handle, +static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { @@ -3063,8 +3167,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, flags, allocated); ext4_ext_show_leaf(inode, path); - /* DIO get_block() before submit the IO, split the extent */ - if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { + /* get_block() before submit the IO, split the extent */ + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { ret = ext4_split_unwritten_extents(handle, inode, path, iblock, max_blocks, flags); @@ -3074,14 +3178,16 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * completed */ if (io) - io->flag = DIO_AIO_UNWRITTEN; + io->flag = EXT4_IO_UNWRITTEN; else - EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; + ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); + if (ext4_should_dioread_nolock(inode)) + set_buffer_uninit(bh_result); goto out; } - /* async DIO end_io complete, convert the filled extent to written */ - if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { - ret = ext4_convert_unwritten_extents_dio(handle, inode, + /* IO end_io complete, convert the filled extent to written */ + if ((flags & EXT4_GET_BLOCKS_CONVERT)) { + ret = ext4_convert_unwritten_extents_endio(handle, inode, path); if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); @@ -3185,7 +3291,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, { struct ext4_ext_path *path = NULL; struct ext4_extent_header *eh; - struct ext4_extent newex, *ex; + struct ext4_extent newex, *ex, *last_ex; ext4_fsblk_t newblock; int err = 0, depth, ret, cache_type; unsigned int allocated = 0; @@ -3237,10 +3343,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this situation is possible, though, _during_ tree modification; * this is why assert can't be put in ext4_ext_find_extent() */ - if (path[depth].p_ext == NULL && depth != 0) { - ext4_error(inode->i_sb, __func__, "bad extent address " - "inode: %lu, iblock: %d, depth: %d", - inode->i_ino, iblock, depth); + if (unlikely(path[depth].p_ext == NULL && depth != 0)) { + EXT4_ERROR_INODE(inode, "bad extent address " + "iblock: %d, depth: %d pblock %lld", + iblock, depth, path[depth].p_block); err = -EIO; goto out2; } @@ -3258,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ - if (iblock >= ee_block && iblock < ee_block + ee_len) { + if (in_range(iblock, ee_block, ee_len)) { newblock = iblock - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (iblock - ee_block); @@ -3350,21 +3456,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ ext4_ext_mark_uninitialized(&newex); /* - * io_end structure was created for every async - * direct IO write to the middle of the file. - * To avoid unecessary convertion for every aio dio rewrite - * to the mid of file, here we flag the IO that is really - * need the convertion. + * io_end structure was created for every IO write to an + * uninitialized extent. To avoid unecessary conversion, + * here we flag the IO that really needs the conversion. * For non asycn direct IO case, flag the inode state * that we need to perform convertion when IO is done. */ - if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { if (io) - io->flag = DIO_AIO_UNWRITTEN; + io->flag = EXT4_IO_UNWRITTEN; else - EXT4_I(inode)->i_state |= - EXT4_STATE_DIO_UNWRITTEN;; + ext4_set_inode_state(inode, + EXT4_STATE_DIO_UNWRITTEN); + } + if (ext4_should_dioread_nolock(inode)) + set_buffer_uninit(bh_result); + } + + if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { + if (unlikely(!eh->eh_entries)) { + EXT4_ERROR_INODE(inode, + "eh->eh_entries == 0 ee_block %d", + ex->ee_block); + err = -EIO; + goto out2; } + last_ex = EXT_LAST_EXTENT(eh); + if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) + + ext4_ext_get_actual_len(last_ex)) + EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { @@ -3499,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode, i_size_write(inode, new_size); if (new_size > EXT4_I(inode)->i_disksize) ext4_update_i_disksize(inode, new_size); + } else { + /* + * Mark that we allocate beyond EOF so the subsequent truncate + * can proceed even if the new size is the same as i_size. + */ + if (new_size > i_size_read(inode)) + EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; } } @@ -3603,7 +3730,7 @@ retry: * Returns 0 on success. */ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - loff_t len) + ssize_t len) { handle_t *handle; ext4_lblk_t block; @@ -3635,7 +3762,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, map_bh.b_state = 0; ret = ext4_get_blocks(handle, inode, block, max_blocks, &map_bh, - EXT4_GET_BLOCKS_DIO_CONVERT_EXT); + EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) { WARN_ON(ret <= 0); printk(KERN_ERR "%s: ext4_ext_get_blocks " @@ -3739,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode, int error = 0; /* in-inode? */ - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { struct ext4_iloc iloc; int offset; /* offset of xattr in inode */ @@ -3767,7 +3894,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { ext4_lblk_t start_blk; - ext4_lblk_t len_blks; int error = 0; /* fallback to generic here if not in extents fmt */ @@ -3781,8 +3907,14 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { error = ext4_xattr_fiemap(inode, fieinfo); } else { + ext4_lblk_t len_blks; + __u64 last_blk; + start_blk = start >> inode->i_sb->s_blocksize_bits; - len_blks = len >> inode->i_sb->s_blocksize_bits; + last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; + if (last_blk >= EXT_MAX_BLOCK) + last_blk = EXT_MAX_BLOCK-1; + len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; /* * Walk the extent tree gathering extent information. diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 56eee3d796c..d0776e410f3 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -23,6 +23,7 @@ #include <linux/jbd2.h> #include <linux/mount.h> #include <linux/path.h> +#include <linux/quotaops.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" @@ -35,9 +36,9 @@ */ static int ext4_release_file(struct inode *inode, struct file *filp) { - if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { + if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { ext4_alloc_da_blocks(inode); - EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; + ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && @@ -125,7 +126,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) sb->s_dirt = 1; } } - return generic_file_open(inode, filp); + return dquot_file_open(inode, filp); } const struct file_operations ext4_file_operations = { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 98bd140aad0..0d0c3239c1c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -63,7 +63,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) if (inode->i_sb->s_flags & MS_RDONLY) return 0; - ret = flush_aio_dio_completed_IO(inode); + ret = flush_completed_IO(inode); if (ret < 0) return ret; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f3624ead4f6..361c0b9962a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -76,8 +76,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __func__, "Checksum bad for group %u", - block_group); + ext4_error(sb, "Checksum bad for group %u", block_group); ext4_free_blks_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); @@ -111,8 +110,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) bitmap_blk = ext4_inode_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, __func__, - "Cannot read inode bitmap - " + ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -153,8 +151,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) set_bitmap_uptodate(bh); if (bh_submit_read(bh) < 0) { put_bh(bh); - ext4_error(sb, __func__, - "Cannot read inode bitmap - " + ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -217,10 +214,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ - vfs_dq_init(inode); + dquot_initialize(inode); ext4_xattr_delete_inode(handle, inode); - vfs_dq_free_inode(inode); - vfs_dq_drop(inode); + dquot_free_inode(inode); + dquot_drop(inode); is_directory = S_ISDIR(inode->i_mode); @@ -229,8 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) es = EXT4_SB(sb)->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error(sb, "ext4_free_inode", - "reserved or nonexistent inode %lu", ino); + ext4_error(sb, "reserved or nonexistent inode %lu", ino); goto error_return; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); @@ -248,8 +244,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), bit, bitmap_bh->b_data); if (!cleared) - ext4_error(sb, "ext4_free_inode", - "bit already cleared for inode %lu", ino); + ext4_error(sb, "bit already cleared for inode %lu", ino); else { gdp = ext4_get_group_desc(sb, block_group, &bh2); @@ -736,8 +731,7 @@ static int ext4_claim_inode(struct super_block *sb, if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || ino > EXT4_INODES_PER_GROUP(sb)) { ext4_unlock_group(sb, group); - ext4_error(sb, __func__, - "reserved inode or inode > inodes count - " + ext4_error(sb, "reserved inode or inode > inodes count - " "block_group = %u, inode=%lu", group, ino + group * EXT4_INODES_PER_GROUP(sb)); return 1; @@ -904,7 +898,7 @@ repeat_in_this_group: BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, - inode, + NULL, inode_bitmap_bh); if (err) goto fail; @@ -1029,15 +1023,16 @@ got: inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - ei->i_state = EXT4_STATE_NEW; + ei->i_state_flags = 0; + ext4_set_inode_state(inode, EXT4_STATE_NEW); ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; - if (vfs_dq_alloc_inode(inode)) { - err = -EDQUOT; + dquot_initialize(inode); + err = dquot_alloc_inode(inode); + if (err) goto fail_drop; - } err = ext4_init_acl(handle, inode, dir); if (err) @@ -1074,10 +1069,10 @@ really_out: return ret; fail_free_drop: - vfs_dq_free_inode(inode); + dquot_free_inode(inode); fail_drop: - vfs_dq_drop(inode); + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; unlock_new_inode(inode); @@ -1098,8 +1093,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { - ext4_warning(sb, __func__, - "bad orphan ino %lu! e2fsck was run?", ino); + ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); goto error; } @@ -1107,8 +1101,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = ext4_read_inode_bitmap(sb, block_group); if (!bitmap_bh) { - ext4_warning(sb, __func__, - "inode bitmap error for orphan %lu", ino); + ext4_warning(sb, "inode bitmap error for orphan %lu", ino); goto error; } @@ -1140,8 +1133,7 @@ iget_failed: err = PTR_ERR(inode); inode = NULL; bad_orphan: - ext4_warning(sb, __func__, - "bad orphan inode %lu! e2fsck was run?", ino); + ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", bit, (unsigned long long)bitmap_bh->b_blocknr, ext4_test_bit(bit, bitmap_bh->b_data)); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e0..986120f3006 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include <linux/uio.h> #include <linux/bio.h> #include <linux/workqueue.h> +#include <linux/kernel.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -170,6 +171,9 @@ void ext4_delete_inode(struct inode *inode) handle_t *handle; int err; + if (!is_bad_inode(inode)) + dquot_initialize(inode); + if (ext4_should_order_data(inode)) ext4_begin_ordered_truncate(inode, 0); truncate_inode_pages(&inode->i_data, 0); @@ -194,7 +198,7 @@ void ext4_delete_inode(struct inode *inode) inode->i_size = 0; err = ext4_mark_inode_dirty(handle, inode); if (err) { - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "couldn't mark inode dirty (err %d)", err); goto stop_handle; } @@ -212,7 +216,7 @@ void ext4_delete_inode(struct inode *inode) if (err > 0) err = ext4_journal_restart(handle, 3); if (err != 0) { - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "couldn't extend journal (err %d)", err); stop_handle: ext4_journal_stop(handle); @@ -323,8 +327,7 @@ static int ext4_block_to_path(struct inode *inode, offsets[n++] = i_block & (ptrs - 1); final = ptrs; } else { - ext4_warning(inode->i_sb, "ext4_block_to_path", - "block %lu > max in inode %lu", + ext4_warning(inode->i_sb, "block %lu > max in inode %lu", i_block + direct_blocks + indirect_blocks + double_blocks, inode->i_ino); } @@ -344,7 +347,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, if (blk && unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), blk, 1))) { - ext4_error(inode->i_sb, function, + __ext4_error(inode->i_sb, function, "invalid block reference %u " "in inode #%lu", blk, inode->i_ino); return -EIO; @@ -607,7 +610,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, if (*err) goto failed_out; - BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); + if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + count %lu > %d!", + current_block, count, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } target -= count; /* allocate blocks for indirect blocks */ @@ -643,7 +653,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, ar.flags = EXT4_MB_HINT_DATA; current_block = ext4_mb_new_blocks(handle, &ar, err); - BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); + if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + ar.len %d > %d!", + current_block, ar.len, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } if (*err && (target == blks)) { /* @@ -1061,6 +1078,7 @@ void ext4_da_update_reserve_space(struct inode *inode, int mdb_free = 0, allocated_meta_blocks = 0; spin_lock(&ei->i_block_reservation_lock); + trace_ext4_da_update_reserve_space(inode, used); if (unlikely(used > ei->i_reserved_data_blocks)) { ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " "with only %d reserved data blocks\n", @@ -1093,9 +1111,9 @@ void ext4_da_update_reserve_space(struct inode *inode, /* Update quota subsystem */ if (quota_claim) { - vfs_dq_claim_block(inode, used); + dquot_claim_block(inode, used); if (mdb_free) - vfs_dq_release_reservation_block(inode, mdb_free); + dquot_release_reservation_block(inode, mdb_free); } else { /* * We did fallocate with an offset that is already delayed @@ -1106,8 +1124,8 @@ void ext4_da_update_reserve_space(struct inode *inode, * that */ if (allocated_meta_blocks) - vfs_dq_claim_block(inode, allocated_meta_blocks); - vfs_dq_release_reservation_block(inode, mdb_free + used); + dquot_claim_block(inode, allocated_meta_blocks); + dquot_release_reservation_block(inode, mdb_free + used); } /* @@ -1124,7 +1142,7 @@ static int check_block_validity(struct inode *inode, const char *msg, sector_t logical, sector_t phys, int len) { if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { - ext4_error(inode->i_sb, msg, + __ext4_error(inode->i_sb, msg, "inode #%lu logical block %llu mapped to %llu " "(size %d)", inode->i_ino, (unsigned long long) logical, @@ -1306,7 +1324,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * i_data's format changing. Force the migrate * to fail by clearing migrate flags */ - EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; + ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); } /* @@ -1534,6 +1552,8 @@ static void ext4_truncate_failed_write(struct inode *inode) ext4_truncate(inode); } +static int ext4_get_block_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1575,8 +1595,12 @@ retry: } *pagep = page; - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_get_block); + if (ext4_should_dioread_nolock(inode)) + ret = block_write_begin(file, mapping, pos, len, flags, pagep, + fsdata, ext4_get_block_write); + else + ret = block_write_begin(file, mapping, pos, len, flags, pagep, + fsdata, ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -1793,7 +1817,7 @@ static int ext4_journalled_write_end(struct file *file, new_i_size = pos + copied; if (new_i_size > inode->i_size) i_size_write(inode, pos+copied); - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; + ext4_set_inode_state(inode, EXT4_STATE_JDATA); if (new_i_size > EXT4_I(inode)->i_disksize) { ext4_update_i_disksize(inode, new_i_size); ret2 = ext4_mark_inode_dirty(handle, inode); @@ -1836,6 +1860,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned long md_needed, md_reserved; + int ret; /* * recalculate the amount of metadata blocks to reserve @@ -1846,6 +1871,7 @@ repeat: spin_lock(&ei->i_block_reservation_lock); md_reserved = ei->i_reserved_meta_blocks; md_needed = ext4_calc_metadata_amount(inode, lblock); + trace_ext4_da_reserve_space(inode, md_needed); spin_unlock(&ei->i_block_reservation_lock); /* @@ -1853,11 +1879,12 @@ repeat: * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, md_needed + 1)) - return -EDQUOT; + ret = dquot_reserve_block(inode, md_needed + 1); + if (ret) + return ret; if (ext4_claim_free_blocks(sbi, md_needed + 1)) { - vfs_dq_release_reservation_block(inode, md_needed + 1); + dquot_release_reservation_block(inode, md_needed + 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); goto repeat; @@ -1914,7 +1941,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - vfs_dq_release_reservation_block(inode, to_free); + dquot_release_reservation_block(inode, to_free); } static void ext4_da_page_release_reservation(struct page *page, @@ -2091,6 +2118,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); + if (buffer_uninit(exbh)) + set_buffer_uninit(bh); cur_logical++; pblock++; } while ((bh = bh->b_this_page) != head); @@ -2133,17 +2162,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - index = page->index; - if (index > end) + if (page->index > end) break; - index++; - BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); block_invalidatepage(page, 0); ClearPageUptodate(page); unlock_page(page); } + index = pvec.pages[nr_pages - 1]->index + 1; + pagevec_release(&pvec); } return; } @@ -2220,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) */ new.b_state = 0; get_blocks_flags = EXT4_GET_BLOCKS_CREATE; + if (ext4_should_dioread_nolock(mpd->inode)) + get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; if (mpd->b_state & (1 << BH_Delay)) get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; @@ -2630,11 +2660,14 @@ static int __ext4_journalled_writepage(struct page *page, ret = err; walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; + ext4_set_inode_state(inode, EXT4_STATE_JDATA); out: return ret; } +static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); +static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); + /* * Note that we don't need to start a transaction unless we're journaling data * because we should have holes filled from ext4_page_mkwrite(). We even don't @@ -2682,7 +2715,7 @@ static int ext4_writepage(struct page *page, int ret = 0; loff_t size; unsigned int len; - struct buffer_head *page_bufs; + struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; trace_ext4_writepage(inode, page); @@ -2758,7 +2791,11 @@ static int ext4_writepage(struct page *page, if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) ret = nobh_writepage(page, noalloc_get_block_write, wbc); - else + else if (page_bufs && buffer_uninit(page_bufs)) { + ext4_set_bh_endio(page_bufs, inode); + ret = block_write_full_page_endio(page, noalloc_get_block_write, + wbc, ext4_end_io_buffer_write); + } else ret = block_write_full_page(page, noalloc_get_block_write, wbc); @@ -3301,7 +3338,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) filemap_write_and_wait(mapping); } - if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { + if (EXT4_JOURNAL(inode) && + ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: @@ -3320,7 +3358,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) * everything they get. */ - EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; + ext4_clear_inode_state(inode, EXT4_STATE_JDATA); journal = EXT4_JOURNAL(inode); jbd2_journal_lock_updates(journal); err = jbd2_journal_flush(journal); @@ -3345,11 +3383,45 @@ ext4_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); } +static void ext4_free_io_end(ext4_io_end_t *io) +{ + BUG_ON(!io); + if (io->page) + put_page(io->page); + iput(io->inode); + kfree(io); +} + +static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) +{ + struct buffer_head *head, *bh; + unsigned int curr_off = 0; + + if (!page_has_buffers(page)) + return; + head = bh = page_buffers(page); + do { + if (offset <= curr_off && test_clear_buffer_uninit(bh) + && bh->b_private) { + ext4_free_io_end(bh->b_private); + bh->b_private = NULL; + bh->b_end_io = NULL; + } + curr_off = curr_off + bh->b_size; + bh = bh->b_this_page; + } while (bh != head); +} + static void ext4_invalidatepage(struct page *page, unsigned long offset) { journal_t *journal = EXT4_JOURNAL(page->mapping->host); /* + * free any io_end structure allocated for buffers to be discarded + */ + if (ext4_should_dioread_nolock(page->mapping->host)) + ext4_invalidatepage_free_endio(page, offset); + /* * If it's a full truncate we just forget about the pending dirtying */ if (offset == 0) @@ -3420,7 +3492,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, } retry: - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + if (rw == READ && ext4_should_dioread_nolock(inode)) + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL); + else + ret = blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -3436,6 +3515,9 @@ retry: * but cannot extend i_size. Bail out and pretend * the write failed... */ ret = PTR_ERR(handle); + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + goto out; } if (inode->i_nlink) @@ -3463,75 +3545,63 @@ out: return ret; } -static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, +static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - handle_t *handle = NULL; + handle_t *handle = ext4_journal_current_handle(); int ret = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; int dio_credits; + int started = 0; - ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", + ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", inode->i_ino, create); /* - * DIO VFS code passes create = 0 flag for write to - * the middle of file. It does this to avoid block - * allocation for holes, to prevent expose stale data - * out when there is parallel buffered read (which does - * not hold the i_mutex lock) while direct IO write has - * not completed. DIO request on holes finally falls back - * to buffered IO for this reason. - * - * For ext4 extent based file, since we support fallocate, - * new allocated extent as uninitialized, for holes, we - * could fallocate blocks for holes, thus parallel - * buffered IO read will zero out the page when read on - * a hole while parallel DIO write to the hole has not completed. - * - * when we come here, we know it's a direct IO write to - * to the middle of file (<i_size) - * so it's safe to override the create flag from VFS. + * ext4_get_block in prepare for a DIO write or buffer write. + * We allocate an uinitialized extent if blocks haven't been allocated. + * The extent will be converted to initialized after IO complete. */ - create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; + create = EXT4_GET_BLOCKS_IO_CREATE_EXT; - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); - handle = ext4_journal_start(inode, dio_credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; + if (!handle) { + if (max_blocks > DIO_MAX_BLOCKS) + max_blocks = DIO_MAX_BLOCKS; + dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); + handle = ext4_journal_start(inode, dio_credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + started = 1; } + ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, create); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; } - ext4_journal_stop(handle); + if (started) + ext4_journal_stop(handle); out: return ret; } -static void ext4_free_io_end(ext4_io_end_t *io) -{ - BUG_ON(!io); - iput(io->inode); - kfree(io); -} -static void dump_aio_dio_list(struct inode * inode) +static void dump_completed_IO(struct inode * inode) { #ifdef EXT4_DEBUG struct list_head *cur, *before, *after; ext4_io_end_t *io, *io0, *io1; + unsigned long flags; - if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ - ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); + if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ + ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); return; } - ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); - list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ + ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); + spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); + list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ cur = &io->list; before = cur->prev; io0 = container_of(before, ext4_io_end_t, list); @@ -3541,32 +3611,31 @@ static void dump_aio_dio_list(struct inode * inode) ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", io, inode->i_ino, io0, io1); } + spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); #endif } /* * check a range of space and convert unwritten extents to written. */ -static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) +static int ext4_end_io_nolock(ext4_io_end_t *io) { struct inode *inode = io->inode; loff_t offset = io->offset; - size_t size = io->size; + ssize_t size = io->size; int ret = 0; - ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," + ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", io, inode->i_ino, io->list.next, io->list.prev); if (list_empty(&io->list)) return ret; - if (io->flag != DIO_AIO_UNWRITTEN) + if (io->flag != EXT4_IO_UNWRITTEN) return ret; - if (offset + size <= i_size_read(inode)) - ret = ext4_convert_unwritten_extents(inode, offset, size); - + ret = ext4_convert_unwritten_extents(inode, offset, size); if (ret < 0) { printk(KERN_EMERG "%s: failed to convert unwritten" "extents to written extents, error is %d" @@ -3579,50 +3648,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) io->flag = 0; return ret; } + /* * work on completed aio dio IO, to convert unwritten extents to extents */ -static void ext4_end_aio_dio_work(struct work_struct *work) +static void ext4_end_io_work(struct work_struct *work) { - ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); - struct inode *inode = io->inode; - int ret = 0; + ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); + struct inode *inode = io->inode; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned long flags; + int ret; mutex_lock(&inode->i_mutex); - ret = ext4_end_aio_dio_nolock(io); - if (ret >= 0) { - if (!list_empty(&io->list)) - list_del_init(&io->list); - ext4_free_io_end(io); + ret = ext4_end_io_nolock(io); + if (ret < 0) { + mutex_unlock(&inode->i_mutex); + return; } + + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + if (!list_empty(&io->list)) + list_del_init(&io->list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); mutex_unlock(&inode->i_mutex); + ext4_free_io_end(io); } + /* * This function is called from ext4_sync_file(). * - * When AIO DIO IO is completed, the work to convert unwritten - * extents to written is queued on workqueue but may not get immediately + * When IO is completed, the work to convert unwritten extents to + * written is queued on workqueue but may not get immediately * scheduled. When fsync is called, we need to ensure the * conversion is complete before fsync returns. - * The inode keeps track of a list of completed AIO from DIO path - * that might needs to do the conversion. This function walks through - * the list and convert the related unwritten extents to written. + * The inode keeps track of a list of pending/completed IO that + * might needs to do the conversion. This function walks through + * the list and convert the related unwritten extents for completed IO + * to written. + * The function return the number of pending IOs on success. */ -int flush_aio_dio_completed_IO(struct inode *inode) +int flush_completed_IO(struct inode *inode) { ext4_io_end_t *io; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned long flags; int ret = 0; int ret2 = 0; - if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) + if (list_empty(&ei->i_completed_io_list)) return ret; - dump_aio_dio_list(inode); - while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ - io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, + dump_completed_IO(inode); + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + while (!list_empty(&ei->i_completed_io_list)){ + io = list_entry(ei->i_completed_io_list.next, ext4_io_end_t, list); /* - * Calling ext4_end_aio_dio_nolock() to convert completed + * Calling ext4_end_io_nolock() to convert completed * IO to written. * * When ext4_sync_file() is called, run_queue() may already @@ -3635,20 +3718,23 @@ int flush_aio_dio_completed_IO(struct inode *inode) * avoid double converting from both fsync and background work * queue work. */ - ret = ext4_end_aio_dio_nolock(io); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + ret = ext4_end_io_nolock(io); + spin_lock_irqsave(&ei->i_completed_io_lock, flags); if (ret < 0) ret2 = ret; else list_del_init(&io->list); } + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); return (ret2 < 0) ? ret2 : 0; } -static ext4_io_end_t *ext4_init_io_end (struct inode *inode) +static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) { ext4_io_end_t *io = NULL; - io = kmalloc(sizeof(*io), GFP_NOFS); + io = kmalloc(sizeof(*io), flags); if (io) { igrab(inode); @@ -3656,8 +3742,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) io->flag = 0; io->offset = 0; io->size = 0; - io->error = 0; - INIT_WORK(&io->work, ext4_end_aio_dio_work); + io->page = NULL; + INIT_WORK(&io->work, ext4_end_io_work); INIT_LIST_HEAD(&io->list); } @@ -3669,6 +3755,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, { ext4_io_end_t *io_end = iocb->private; struct workqueue_struct *wq; + unsigned long flags; + struct ext4_inode_info *ei; /* if not async direct IO or dio with 0 bytes write, just return */ if (!io_end || !size) @@ -3680,7 +3768,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, size); /* if not aio dio with unwritten extents, just free io and return */ - if (io_end->flag != DIO_AIO_UNWRITTEN){ + if (io_end->flag != EXT4_IO_UNWRITTEN){ ext4_free_io_end(io_end); iocb->private = NULL; return; @@ -3688,16 +3776,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, io_end->offset = offset; io_end->size = size; + io_end->flag = EXT4_IO_UNWRITTEN; wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; /* queue the work to convert unwritten extents to written */ queue_work(wq, &io_end->work); /* Add the io_end to per-inode completed aio dio list*/ - list_add_tail(&io_end->list, - &EXT4_I(io_end->inode)->i_aio_dio_complete_list); + ei = EXT4_I(io_end->inode); + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + list_add_tail(&io_end->list, &ei->i_completed_io_list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); iocb->private = NULL; } + +static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) +{ + ext4_io_end_t *io_end = bh->b_private; + struct workqueue_struct *wq; + struct inode *inode; + unsigned long flags; + + if (!test_clear_buffer_uninit(bh) || !io_end) + goto out; + + if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { + printk("sb umounted, discard end_io request for inode %lu\n", + io_end->inode->i_ino); + ext4_free_io_end(io_end); + goto out; + } + + io_end->flag = EXT4_IO_UNWRITTEN; + inode = io_end->inode; + + /* Add the io_end to per-inode completed io list*/ + spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); + list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); + spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); + + wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; + /* queue the work to convert unwritten extents to written */ + queue_work(wq, &io_end->work); +out: + bh->b_private = NULL; + bh->b_end_io = NULL; + clear_buffer_uninit(bh); + end_buffer_async_write(bh, uptodate); +} + +static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) +{ + ext4_io_end_t *io_end; + struct page *page = bh->b_page; + loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; + size_t size = bh->b_size; + +retry: + io_end = ext4_init_io_end(inode, GFP_ATOMIC); + if (!io_end) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: allocation fail\n", __func__); + schedule(); + goto retry; + } + io_end->offset = offset; + io_end->size = size; + /* + * We need to hold a reference to the page to make sure it + * doesn't get evicted before ext4_end_io_work() has a chance + * to convert the extent from written to unwritten. + */ + io_end->page = page; + get_page(io_end->page); + + bh->b_private = io_end; + bh->b_end_io = ext4_end_io_buffer_write; + return 0; +} + /* * For ext4 extent files, ext4 will do direct-io write to holes, * preallocated extents, and those write extend the file, no need to @@ -3751,7 +3908,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, iocb->private = NULL; EXT4_I(inode)->cur_aio_dio = NULL; if (!is_sync_kiocb(iocb)) { - iocb->private = ext4_init_io_end(inode); + iocb->private = ext4_init_io_end(inode, GFP_NOFS); if (!iocb->private) return -ENOMEM; /* @@ -3767,7 +3924,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, - ext4_get_block_dio_write, + ext4_get_block_write, ext4_end_io_dio); if (iocb->private) EXT4_I(inode)->cur_aio_dio = NULL; @@ -3788,8 +3945,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { ext4_free_io_end(iocb->private); iocb->private = NULL; - } else if (ret > 0 && (EXT4_I(inode)->i_state & - EXT4_STATE_DIO_UNWRITTEN)) { + } else if (ret > 0 && ext4_test_inode_state(inode, + EXT4_STATE_DIO_UNWRITTEN)) { int err; /* * for non AIO case, since the IO is already @@ -3799,7 +3956,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, offset, ret); if (err < 0) ret = err; - EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; + ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } return ret; } @@ -4130,18 +4287,27 @@ no_top: * We release `count' blocks on disk, but (last - first) may be greater * than `count' because there can be holes in there. */ -static void ext4_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, - ext4_fsblk_t block_to_free, - unsigned long count, __le32 *first, - __le32 *last) +static int ext4_clear_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, + ext4_fsblk_t block_to_free, + unsigned long count, __le32 *first, + __le32 *last) { __le32 *p; - int flags = EXT4_FREE_BLOCKS_FORGET; + int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) flags |= EXT4_FREE_BLOCKS_METADATA; + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, + count)) { + ext4_error(inode->i_sb, "inode #%lu: " + "attempt to clear blocks %llu len %lu, invalid", + inode->i_ino, (unsigned long long) block_to_free, + count); + return 1; + } + if (try_to_extend_transaction(handle, inode)) { if (bh) { BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); @@ -4160,6 +4326,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, *p = 0; ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); + return 0; } /** @@ -4215,9 +4382,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, } else if (nr == block_to_free + count) { count++; } else { - ext4_clear_blocks(handle, inode, this_bh, - block_to_free, - count, block_to_free_p, p); + if (ext4_clear_blocks(handle, inode, this_bh, + block_to_free, count, + block_to_free_p, p)) + break; block_to_free = nr; block_to_free_p = p; count = 1; @@ -4241,7 +4409,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) ext4_handle_dirty_metadata(handle, inode, this_bh); else - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "circular indirect block detected, " "inode=%lu, block=%llu", inode->i_ino, @@ -4281,6 +4449,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!nr) continue; /* A hole */ + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), + nr, 1)) { + ext4_error(inode->i_sb, + "indirect mapped block in inode " + "#%lu invalid (level %d, blk #%lu)", + inode->i_ino, depth, + (unsigned long) nr); + break; + } + /* Go read the buffer for the next level down */ bh = sb_bread(inode->i_sb, nr); @@ -4289,7 +4467,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, * (should be rare). */ if (!bh) { - ext4_error(inode->i_sb, "ext4_free_branches", + ext4_error(inode->i_sb, "Read failure, inode=%lu, block=%llu", inode->i_ino, nr); continue; @@ -4433,8 +4611,10 @@ void ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return; + EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; + if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) - ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; + ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { ext4_ext_truncate(inode); @@ -4604,9 +4784,8 @@ static int __ext4_get_inode_loc(struct inode *inode, bh = sb_getblk(sb, block); if (!bh) { - ext4_error(sb, "ext4_get_inode_loc", "unable to read " - "inode block - inode=%lu, block=%llu", - inode->i_ino, block); + ext4_error(sb, "unable to read inode block - " + "inode=%lu, block=%llu", inode->i_ino, block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -4704,9 +4883,8 @@ make_io: submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - ext4_error(sb, __func__, - "unable to read inode block - inode=%lu, " - "block=%llu", inode->i_ino, block); + ext4_error(sb, "unable to read inode block - inode=%lu," + " block=%llu", inode->i_ino, block); brelse(bh); return -EIO; } @@ -4720,7 +4898,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) { /* We have all inode data except xattrs in memory here. */ return __ext4_get_inode_loc(inode, iloc, - !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); + !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); } void ext4_set_inode_flags(struct inode *inode) @@ -4814,7 +4992,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); - ei->i_state = 0; + ei->i_state_flags = 0; ei->i_dir_start_lookup = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. @@ -4897,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) - ei->i_state |= EXT4_STATE_XATTR; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); } } else ei->i_extra_isize = 0; @@ -4917,8 +5095,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { - ext4_error(sb, __func__, - "bad extended attribute block %llu in inode #%lu", + ext4_error(sb, "bad extended attribute block %llu inode #%lu", ei->i_file_acl, inode->i_ino); ret = -EIO; goto bad_inode; @@ -4964,8 +5141,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } else { ret = -EIO; - ext4_error(inode->i_sb, __func__, - "bogus i_mode (%o) for inode=%lu", + ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", inode->i_mode, inode->i_ino); goto bad_inode; } @@ -5037,7 +5213,7 @@ static int ext4_do_update_inode(handle_t *handle, /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ - if (ei->i_state & EXT4_STATE_NEW) + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); ext4_get_inode_flags(ei); @@ -5101,7 +5277,7 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); sb->s_dirt = 1; ext4_handle_sync(handle); - err = ext4_handle_dirty_metadata(handle, inode, + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); } } @@ -5130,10 +5306,10 @@ static int ext4_do_update_inode(handle_t *handle, } BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - rc = ext4_handle_dirty_metadata(handle, inode, bh); + rc = ext4_handle_dirty_metadata(handle, NULL, bh); if (!err) err = rc; - ei->i_state &= ~EXT4_STATE_NEW; + ext4_clear_inode_state(inode, EXT4_STATE_NEW); ext4_update_inode_fsync_trans(handle, inode, 0); out_brelse: @@ -5177,7 +5353,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext4_write_inode(struct inode *inode, int wait) +int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; @@ -5191,7 +5367,7 @@ int ext4_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; err = ext4_force_commit(inode->i_sb); @@ -5201,13 +5377,11 @@ int ext4_write_inode(struct inode *inode, int wait) err = ext4_get_inode_loc(inode, &iloc); if (err) return err; - if (wait) + if (wbc->sync_mode == WB_SYNC_ALL) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - ext4_error(inode->i_sb, __func__, - "IO error syncing inode, " - "inode=%lu, block=%llu", - inode->i_ino, + ext4_error(inode->i_sb, "IO error syncing inode, " + "inode=%lu, block=%llu", inode->i_ino, (unsigned long long)iloc.bh->b_blocknr); err = -EIO; } @@ -5249,6 +5423,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + if (ia_valid & ATTR_SIZE) + dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { handle_t *handle; @@ -5261,7 +5437,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) error = PTR_ERR(handle); goto err_out; } - error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + error = dquot_transfer(inode, attr); if (error) { ext4_journal_stop(handle); return error; @@ -5288,7 +5464,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { + attr->ia_valid & ATTR_SIZE && + (attr->ia_size < inode->i_size || + (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { handle_t *handle; handle = ext4_journal_start(inode, 3); @@ -5319,6 +5497,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) goto err_out; } } + /* ext4_truncate will clear the flag */ + if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) + ext4_truncate(inode); } rc = inode_setattr(inode, attr); @@ -5557,8 +5738,8 @@ static int ext4_expand_extra_isize(struct inode *inode, entry = IFIRST(header); /* No extended attributes present */ - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || - header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || + header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, new_extra_isize); EXT4_I(inode)->i_extra_isize = new_extra_isize; @@ -5602,7 +5783,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) err = ext4_reserve_inode_write(handle, inode, &iloc); if (ext4_handle_valid(handle) && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && - !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { + !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { /* * We need extra buffer credits since we may write into EA block * with this same handle. If journal_extend fails, then it will @@ -5616,10 +5797,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) sbi->s_want_extra_isize, iloc, handle); if (ret) { - EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + ext4_set_inode_state(inode, + EXT4_STATE_NO_EXPAND); if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete" " some EAs or run e2fsck.", inode->i_ino); @@ -5641,7 +5823,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) * i_size has been changed by generic_commit_write() and we thus need * to include the updated inode in the current transaction. * - * Also, vfs_dq_alloc_block() will always dirty the inode when blocks + * Also, dquot_alloc_block() will always dirty the inode when blocks * are allocated to the file. * * If the inode is marked synchronous, we don't honour that here - doing @@ -5683,7 +5865,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode) err = jbd2_journal_get_write_access(handle, iloc.bh); if (!err) err = ext4_handle_dirty_metadata(handle, - inode, + NULL, iloc.bh); brelse(iloc.bh); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index b63d193126d..016d0249294 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -92,6 +92,15 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) flags &= ~EXT4_EXTENTS_FL; } + if (flags & EXT4_EOFBLOCKS_FL) { + /* we don't support adding EOFBLOCKS flag */ + if (!(oldflags & EXT4_EOFBLOCKS_FL)) { + err = -EOPNOTSUPP; + goto flags_out; + } + } else if (oldflags & EXT4_EOFBLOCKS_FL) + ext4_truncate(inode); + handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -249,7 +258,8 @@ setversion_out: if (me.moved_len > 0) file_remove_suid(donor_filp); - if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) + if (copy_to_user((struct move_extent __user *)arg, + &me, sizeof(me))) err = -EFAULT; mext_out: fput(donor_filp); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d34afad3e13..506713a2ebd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -441,10 +441,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, for (i = 0; i < count; i++) { if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { ext4_fsblk_t blocknr; - blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); + + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += first + i; - blocknr += - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_grp_locked_error(sb, e4b->bd_group, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %u)", @@ -1255,10 +1254,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { ext4_fsblk_t blocknr; - blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); + + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += block; - blocknr += - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_grp_locked_error(sb, e4b->bd_group, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %u)", @@ -1631,7 +1629,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, int max; int err; struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - struct ext4_super_block *es = sbi->s_es; struct ext4_free_extent ex; if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) @@ -1648,8 +1645,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { ext4_fsblk_t start; - start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + - ex.fe_start + le32_to_cpu(es->s_first_data_block); + start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) + + ex.fe_start; /* use do_div to get remainder (would be 64-bit modulo) */ if (do_div(start, sbi->s_stripe) == 0) { ac->ac_found++; @@ -1803,8 +1800,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, BUG_ON(sbi->s_stripe == 0); /* find first stripe-aligned block in group */ - first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) - + le32_to_cpu(sbi->s_es->s_first_data_block); + first_group_block = ext4_group_first_block_no(sb, e4b->bd_group); + a = first_group_block + sbi->s_stripe - 1; do_div(a, sbi->s_stripe); i = (a * sbi->s_stripe) - first_group_block; @@ -2256,7 +2253,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); init_rwsem(&meta_group_info[i]->alloc_sem); - meta_group_info[i]->bb_free_root.rb_node = NULL; + meta_group_info[i]->bb_free_root = RB_ROOT; #ifdef DOUBLE_CHECK { @@ -2560,12 +2557,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) ext4_unlock_group(sb, entry->group); if (test_opt(sb, DISCARD)) { ext4_fsblk_t discard_block; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - discard_block = (ext4_fsblk_t)entry->group * - EXT4_BLOCKS_PER_GROUP(sb) - + entry->start_blk - + le32_to_cpu(es->s_first_data_block); + discard_block = entry->start_blk + + ext4_group_first_block_no(sb, entry->group); trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, entry->count); @@ -2703,14 +2697,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (err) goto out_err; - block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + ac->ac_b_ex.fe_start - + le32_to_cpu(es->s_first_data_block); + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); len = ac->ac_b_ex.fe_len; if (!ext4_data_block_valid(sbi, block, len)) { - ext4_error(sb, __func__, - "Allocating blocks %llu-%llu which overlap " + ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata\n", block, block+len); /* File system mounted not to panic on error * Fix the bitmap and repeat the block allocation @@ -3161,9 +3152,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; - goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + - ac->ac_g_ex.fe_start + - le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); + goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex); /* * search for the prealloc space that is having * minimal distance from the goal block. @@ -3526,8 +3515,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, if (bit >= end) break; next = mb_find_next_bit(bitmap_bh->b_data, end, bit); - start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + - le32_to_cpu(sbi->s_es->s_first_data_block); + start = ext4_group_first_block_no(sb, group) + bit; mb_debug(1, " free preallocated %u/%u in group %u\n", (unsigned) start, (unsigned) next - bit, (unsigned) group); @@ -3623,15 +3611,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - ext4_error(sb, __func__, "Error in reading block " - "bitmap for %u", group); + ext4_error(sb, "Error reading block bitmap for %u", group); return 0; } err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, __func__, "Error in loading buddy " - "information for %u", group); + ext4_error(sb, "Error loading buddy information for %u", group); put_bh(bitmap_bh); return 0; } @@ -3804,15 +3790,15 @@ repeat: err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, __func__, "Error in loading buddy " - "information for %u", group); + ext4_error(sb, "Error loading buddy information for %u", + group); continue; } bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - ext4_error(sb, __func__, "Error in reading block " - "bitmap for %u", group); + ext4_error(sb, "Error reading block bitmap for %u", + group); ext4_mb_release_desc(&e4b); continue; } @@ -3938,7 +3924,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) /* don't use group allocation for large files */ size = max(size, isize); - if (size >= sbi->s_mb_stream_request) { + if (size > sbi->s_mb_stream_request) { ac->ac_flags |= EXT4_MB_STREAM_ALLOC; return; } @@ -4077,8 +4063,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); if (ext4_mb_load_buddy(sb, group, &e4b)) { - ext4_error(sb, __func__, "Error in loading buddy " - "information for %u", group); + ext4_error(sb, "Error loading buddy information for %u", + group); continue; } ext4_lock_group(sb, group); @@ -4254,7 +4240,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, return 0; } reserv_blks = ar->len; - while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) { + while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { ar->flags |= EXT4_MB_HINT_NOPREALLOC; ar->len--; } @@ -4331,7 +4317,7 @@ out2: kmem_cache_free(ext4_ac_cachep, ac); out1: if (inquota && ar->len < inquota) - vfs_dq_free_block(ar->inode, inquota - ar->len); + dquot_free_block(ar->inode, inquota - ar->len); out3: if (!ar->len) { if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) @@ -4476,10 +4462,10 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; - if (!ext4_data_block_valid(sbi, block, count)) { - ext4_error(sb, __func__, - "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_data_block_valid(sbi, block, count)) { + ext4_error(sb, "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); goto error_return; } @@ -4547,8 +4533,7 @@ do_more: in_range(block + count - 1, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group)) { - ext4_error(sb, __func__, - "Freeing blocks in system zone - " + ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ goto error_return; @@ -4646,7 +4631,7 @@ do_more: sb->s_dirt = 1; error_return: if (freed) - vfs_dq_free_block(inode, freed); + dquot_free_block(inode, freed); brelse(bitmap_bh); ext4_std_error(sb, err); if (ac) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 436521cae45..b619322c76f 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -220,16 +220,9 @@ struct ext4_buddy { #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, struct ext4_free_extent *fex) { - ext4_fsblk_t block; - - block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + fex->fe_start - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - return block; + return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; } #endif diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 81415814b00..8b87bd0eac9 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -365,12 +365,12 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, * happened after we started the migrate. We need to * fail the migrate */ - if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { + if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { retval = -EAGAIN; up_write(&EXT4_I(inode)->i_data_sem); goto err_out; } else - EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; + ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); /* * We have the extent map build with the tmp inode. * Now copy the i_data across @@ -503,14 +503,10 @@ int ext4_ext_migrate(struct inode *inode) } i_size_write(tmp_inode, i_size_read(inode)); /* - * We don't want the inode to be reclaimed - * if we got interrupted in between. We have - * this tmp inode carrying reference to the - * data blocks of the original file. We set - * the i_nlink to zero at the last stage after - * switching the original file to extent format + * Set the i_nlink to zero so it will be deleted later + * when we drop inode reference. */ - tmp_inode->i_nlink = 1; + tmp_inode->i_nlink = 0; ext4_ext_tree_init(handle, tmp_inode); ext4_orphan_add(handle, tmp_inode); @@ -533,10 +529,20 @@ int ext4_ext_migrate(struct inode *inode) * allocation. */ down_read((&EXT4_I(inode)->i_data_sem)); - EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; + ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); up_read((&EXT4_I(inode)->i_data_sem)); handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) { + /* + * It is impossible to update on-disk structures without + * a handle, so just rollback in-core changes and live other + * work to orphan_list_cleanup() + */ + ext4_orphan_del(NULL, tmp_inode); + retval = PTR_ERR(handle); + goto out; + } ei = EXT4_I(inode); i_data = ei->i_data; @@ -618,15 +624,8 @@ err_out: /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); - - /* - * Set the i_nlink to zero so that - * generic_drop_inode really deletes the - * inode - */ - tmp_inode->i_nlink = 0; - ext4_journal_stop(handle); +out: unlock_new_inode(tmp_inode); iput(tmp_inode); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 82c415be87a..aa5fe28d180 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -152,12 +152,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, int ret = 0; if (inode1 == NULL) { - ext4_error(inode2->i_sb, function, + __ext4_error(inode2->i_sb, function, "Both inodes should not be NULL: " "inode1 NULL inode2 %lu", inode2->i_ino); ret = -EIO; } else if (inode2 == NULL) { - ext4_error(inode1->i_sb, function, + __ext4_error(inode1->i_sb, function, "Both inodes should not be NULL: " "inode1 %lu inode2 NULL", inode1->i_ino); ret = -EIO; @@ -252,6 +252,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, } o_start->ee_len = start_ext->ee_len; + eblock = le32_to_cpu(start_ext->ee_block); new_flag = 1; } else if (start_ext->ee_len && new_ext->ee_len && @@ -262,6 +263,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, * orig |------------------------------| */ o_start->ee_len = start_ext->ee_len; + eblock = le32_to_cpu(start_ext->ee_block); new_flag = 1; } else if (!start_ext->ee_len && new_ext->ee_len && @@ -475,7 +477,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, struct ext4_extent *oext, *o_start, *o_end, *prev_ext; struct ext4_extent new_ext, start_ext, end_ext; ext4_lblk_t new_ext_end; - ext4_fsblk_t new_phys_end; int oext_alen, new_ext_alen, end_ext_alen; int depth = ext_depth(orig_inode); int ret; @@ -489,7 +490,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, new_ext.ee_len = dext->ee_len; new_ext_alen = ext4_ext_get_actual_len(&new_ext); new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; - new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1; /* * Case: original extent is first @@ -502,6 +502,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, le32_to_cpu(oext->ee_block) + oext_alen) { start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - le32_to_cpu(oext->ee_block)); + start_ext.ee_block = oext->ee_block; copy_extent_status(oext, &start_ext); } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { prev_ext = oext - 1; @@ -515,6 +516,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, start_ext.ee_len = cpu_to_le16( ext4_ext_get_actual_len(prev_ext) + new_ext_alen); + start_ext.ee_block = oext->ee_block; copy_extent_status(prev_ext, &start_ext); new_ext.ee_len = 0; } @@ -526,7 +528,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, * new_ext |-------| */ if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { - ext4_error(orig_inode->i_sb, __func__, + ext4_error(orig_inode->i_sb, "new_ext_end(%u) should be less than or equal to " "oext->ee_block(%u) + oext_alen(%d) - 1", new_ext_end, le32_to_cpu(oext->ee_block), @@ -689,12 +691,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, while (1) { /* The extent for donor must be found. */ if (!dext) { - ext4_error(donor_inode->i_sb, __func__, + ext4_error(donor_inode->i_sb, "The extent for donor must be found"); *err = -EIO; goto out; } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { - ext4_error(donor_inode->i_sb, __func__, + ext4_error(donor_inode->i_sb, "Donor offset(%u) and the first block of donor " "extent(%u) should be equal", donor_off, @@ -928,7 +930,7 @@ out2: } /** - * mext_check_argumants - Check whether move extent can be done + * mext_check_arguments - Check whether move extent can be done * * @orig_inode: original inode * @donor_inode: donor inode @@ -949,14 +951,6 @@ mext_check_arguments(struct inode *orig_inode, unsigned int blkbits = orig_inode->i_blkbits; unsigned int blocksize = 1 << blkbits; - /* Regular file check */ - if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { - ext4_debug("ext4 move extent: The argument files should be " - "regular file [ino:orig %lu, donor %lu]\n", - orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; - } - if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { ext4_debug("ext4 move extent: suid or sgid is set" " to donor file [ino:orig %lu, donor %lu]\n", @@ -1204,6 +1198,14 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } + /* Regular file check */ + if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { + ext4_debug("ext4 move extent: The argument files should be " + "regular file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + /* Protect orig and donor inodes against a truncate */ ret1 = mext_inode_double_lock(orig_inode, donor_inode); if (ret1 < 0) @@ -1351,7 +1353,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, if (ret1 < 0) break; if (*moved_len > len) { - ext4_error(orig_inode->i_sb, __func__, + ext4_error(orig_inode->i_sb, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 17a17e10dd6..0c070fabd10 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -383,8 +383,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_LEGACY) { - ext4_warning(dir->i_sb, __func__, - "Unrecognised inode hash code %d", + ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", root->info.hash_version); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -399,8 +398,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, hash = hinfo->hash; if (root->info.unused_flags & 1) { - ext4_warning(dir->i_sb, __func__, - "Unimplemented inode hash flags: %#06x", + ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", root->info.unused_flags); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -408,8 +406,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, } if ((indirect = root->info.indirect_levels) > 1) { - ext4_warning(dir->i_sb, __func__, - "Unimplemented inode hash depth: %#06x", + ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", root->info.indirect_levels); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -421,8 +418,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (dx_get_limit(entries) != dx_root_limit(dir, root->info.info_length)) { - ext4_warning(dir->i_sb, __func__, - "dx entry: limit != root limit"); + ext4_warning(dir->i_sb, "dx entry: limit != root limit"); brelse(bh); *err = ERR_BAD_DX_DIR; goto fail; @@ -433,7 +429,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { - ext4_warning(dir->i_sb, __func__, + ext4_warning(dir->i_sb, "dx entry: no count or count > limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -478,7 +474,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { - ext4_warning(dir->i_sb, __func__, + ext4_warning(dir->i_sb, "dx entry: limit != node limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -494,7 +490,7 @@ fail2: } fail: if (*err == ERR_BAD_DX_DIR) - ext4_warning(dir->i_sb, __func__, + ext4_warning(dir->i_sb, "Corrupt dir inode %ld, running e2fsck is " "recommended.", dir->i_ino); return NULL; @@ -947,9 +943,8 @@ restart: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ - ext4_error(sb, __func__, "reading directory #%lu " - "offset %lu", dir->i_ino, - (unsigned long)block); + ext4_error(sb, "reading directory #%lu offset %lu", + dir->i_ino, (unsigned long)block); brelse(bh); goto next; } @@ -1041,7 +1036,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q retval = ext4_htree_next_block(dir, hash, frame, frames, NULL); if (retval < 0) { - ext4_warning(sb, __func__, + ext4_warning(sb, "error reading index page in directory #%lu", dir->i_ino); *err = retval; @@ -1071,14 +1066,13 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru __u32 ino = le32_to_cpu(de->inode); brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { - ext4_error(dir->i_sb, "ext4_lookup", - "bad inode number: %u", ino); + ext4_error(dir->i_sb, "bad inode number: %u", ino); return ERR_PTR(-EIO); } inode = ext4_iget(dir->i_sb, ino); if (unlikely(IS_ERR(inode))) { if (PTR_ERR(inode) == -ESTALE) { - ext4_error(dir->i_sb, __func__, + ext4_error(dir->i_sb, "deleted inode referenced: %u", ino); return ERR_PTR(-EIO); @@ -1110,7 +1104,7 @@ struct dentry *ext4_get_parent(struct dentry *child) brelse(bh); if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { - ext4_error(child->d_inode->i_sb, "ext4_get_parent", + ext4_error(child->d_inode->i_sb, "bad inode number: %u", ino); return ERR_PTR(-EIO); } @@ -1410,7 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, de = (struct ext4_dir_entry_2 *)((char *)fde + ext4_rec_len_from_disk(fde->rec_len, blocksize)); if ((char *) de >= (((char *) root) + blocksize)) { - ext4_error(dir->i_sb, __func__, + ext4_error(dir->i_sb, "invalid rec_len for '..' in inode %lu", dir->i_ino); brelse(bh); @@ -1575,8 +1569,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels && (dx_get_count(frames->entries) == dx_get_limit(frames->entries))) { - ext4_warning(sb, __func__, - "Directory index full!"); + ext4_warning(sb, "Directory index full!"); err = -ENOSPC; goto cleanup; } @@ -1766,6 +1759,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; int err, retries = 0; + dquot_initialize(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -1800,6 +1795,8 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; + dquot_initialize(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -1837,6 +1834,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; + dquot_initialize(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -1916,11 +1915,11 @@ static int empty_dir(struct inode *inode) if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { if (err) - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "error %d reading directory #%lu offset 0", err, inode->i_ino); else - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "bad directory (dir #%lu) - no data block", inode->i_ino); return 1; @@ -1931,7 +1930,7 @@ static int empty_dir(struct inode *inode) !le32_to_cpu(de1->inode) || strcmp(".", de->name) || strcmp("..", de1->name)) { - ext4_warning(inode->i_sb, "empty_dir", + ext4_warning(inode->i_sb, "bad directory (dir #%lu) - no `.' or `..'", inode->i_ino); brelse(bh); @@ -1949,7 +1948,7 @@ static int empty_dir(struct inode *inode) offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); if (!bh) { if (err) - ext4_error(sb, __func__, + ext4_error(sb, "error %d reading directory" " #%lu offset %u", err, inode->i_ino, offset); @@ -2020,11 +2019,18 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto out_unlock; + /* + * Due to previous errors inode may be already a part of on-disk + * orphan list. If so skip on-disk list modification. + */ + if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= + (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) + goto mem_insert; /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -2037,6 +2043,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * * This is safe: on error we're going to ignore the orphan list * anyway on the next recovery. */ +mem_insert: if (!err) list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); @@ -2096,7 +2103,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (err) goto out_brelse; sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = @@ -2136,7 +2143,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go in * separate transaction */ - vfs_dq_init(dentry->d_inode); + dquot_initialize(dir); + dquot_initialize(dentry->d_inode); + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2163,7 +2172,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) if (retval) goto end_rmdir; if (!EXT4_DIR_LINK_EMPTY(inode)) - ext4_warning(inode->i_sb, "ext4_rmdir", + ext4_warning(inode->i_sb, "empty directory has too many links (%d)", inode->i_nlink); inode->i_version++; @@ -2195,7 +2204,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go * in separate transaction */ - vfs_dq_init(dentry->d_inode); + dquot_initialize(dir); + dquot_initialize(dentry->d_inode); + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2215,7 +2226,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) goto end_unlink; if (!inode->i_nlink) { - ext4_warning(inode->i_sb, "ext4_unlink", + ext4_warning(inode->i_sb, "Deleting nonexistent file (%lu), %d", inode->i_ino, inode->i_nlink); inode->i_nlink = 1; @@ -2250,6 +2261,8 @@ static int ext4_symlink(struct inode *dir, if (l > dir->i_sb->s_blocksize) return -ENAMETOOLONG; + dquot_initialize(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + @@ -2308,6 +2321,8 @@ static int ext4_link(struct dentry *old_dentry, if (inode->i_nlink >= EXT4_LINK_MAX) return -EMLINK; + dquot_initialize(dir); + /* * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing * otherwise has the potential to corrupt the orphan inode list. @@ -2358,12 +2373,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct ext4_dir_entry_2 *old_de, *new_de; int retval, force_da_alloc = 0; + dquot_initialize(old_dir); + dquot_initialize(new_dir); + old_bh = new_bh = dir_bh = NULL; /* Initialize quotas before so that eventual writes go * in separate transaction */ if (new_dentry->d_inode) - vfs_dq_init(new_dentry->d_inode); + dquot_initialize(new_dentry->d_inode); handle = ext4_journal_start(old_dir, 2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); @@ -2462,7 +2480,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } } if (retval) { - ext4_warning(old_dir->i_sb, "ext4_rename", + ext4_warning(old_dir->i_sb, "Deleting old file (%lu), %d, error=%d", old_dir->i_ino, old_dir->i_nlink, retval); } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3b2c5541d8a..5692c48754a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -48,65 +48,54 @@ static int verify_group_input(struct super_block *sb, ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (group != sbi->s_groups_count) - ext4_warning(sb, __func__, - "Cannot add at group %u (only %u groups)", + ext4_warning(sb, "Cannot add at group %u (only %u groups)", input->group, sbi->s_groups_count); else if (offset != 0) - ext4_warning(sb, __func__, "Last group not full"); + ext4_warning(sb, "Last group not full"); else if (input->reserved_blocks > input->blocks_count / 5) - ext4_warning(sb, __func__, "Reserved blocks too high (%u)", + ext4_warning(sb, "Reserved blocks too high (%u)", input->reserved_blocks); else if (free_blocks_count < 0) - ext4_warning(sb, __func__, "Bad blocks count %u", + ext4_warning(sb, "Bad blocks count %u", input->blocks_count); else if (!(bh = sb_bread(sb, end - 1))) - ext4_warning(sb, __func__, - "Cannot read last block (%llu)", + ext4_warning(sb, "Cannot read last block (%llu)", end - 1); else if (outside(input->block_bitmap, start, end)) - ext4_warning(sb, __func__, - "Block bitmap not in group (block %llu)", + ext4_warning(sb, "Block bitmap not in group (block %llu)", (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) - ext4_warning(sb, __func__, - "Inode bitmap not in group (block %llu)", + ext4_warning(sb, "Inode bitmap not in group (block %llu)", (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) - ext4_warning(sb, __func__, - "Inode table not in group (blocks %llu-%llu)", + ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)", (unsigned long long)input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) - ext4_warning(sb, __func__, - "Block bitmap same as inode bitmap (%llu)", + ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)", (unsigned long long)input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) - ext4_warning(sb, __func__, - "Block bitmap (%llu) in inode table (%llu-%llu)", + ext4_warning(sb, "Block bitmap (%llu) in inode table " + "(%llu-%llu)", (unsigned long long)input->block_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->inode_bitmap, input->inode_table, itend)) - ext4_warning(sb, __func__, - "Inode bitmap (%llu) in inode table (%llu-%llu)", + ext4_warning(sb, "Inode bitmap (%llu) in inode table " + "(%llu-%llu)", (unsigned long long)input->inode_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->block_bitmap, start, metaend)) - ext4_warning(sb, __func__, - "Block bitmap (%llu) in GDT table" - " (%llu-%llu)", + ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)", (unsigned long long)input->block_bitmap, start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) - ext4_warning(sb, __func__, - "Inode bitmap (%llu) in GDT table" - " (%llu-%llu)", + ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)", (unsigned long long)input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) - ext4_warning(sb, __func__, - "Inode table (%llu-%llu) overlaps" - "GDT table (%llu-%llu)", + ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table " + "(%llu-%llu)", (unsigned long long)input->inode_table, itend - 1, start, metaend - 1); else @@ -364,8 +353,7 @@ static int verify_reserved_gdb(struct super_block *sb, while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ - ext4_warning(sb, __func__, - "reserved GDT %llu" + ext4_warning(sb, "reserved GDT %llu" " missing grp %d (%llu)", blk, grp, grp * @@ -420,8 +408,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, */ if (EXT4_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { - ext4_warning(sb, __func__, - "won't resize using backup superblock at %llu", + ext4_warning(sb, "won't resize using backup superblock at %llu", (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); return -EPERM; } @@ -444,8 +431,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { - ext4_warning(sb, __func__, - "new group %u GDT block %llu not reserved", + ext4_warning(sb, "new group %u GDT block %llu not reserved", input->group, gdblock); err = -EINVAL; goto exit_dind; @@ -468,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, GFP_NOFS); if (!n_group_desc) { err = -ENOMEM; - ext4_warning(sb, __func__, + ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); goto exit_inode; } @@ -567,8 +553,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, /* Get each reserved primary GDT block and verify it holds backups */ for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { - ext4_warning(sb, __func__, - "reserved block %llu" + ext4_warning(sb, "reserved block %llu" " not at offset %ld", blk, (long)(data - (__le32 *)dind->b_data)); @@ -713,8 +698,7 @@ static void update_backups(struct super_block *sb, */ exit_err: if (err) { - ext4_warning(sb, __func__, - "can't update backup for group %u (err %d), " + ext4_warning(sb, "can't update backup for group %u (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); @@ -753,20 +737,19 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ext4_warning(sb, __func__, - "Can't resize non-sparse filesystem further"); + ext4_warning(sb, "Can't resize non-sparse filesystem further"); return -EPERM; } if (ext4_blocks_count(es) + input->blocks_count < ext4_blocks_count(es)) { - ext4_warning(sb, __func__, "blocks_count overflow"); + ext4_warning(sb, "blocks_count overflow"); return -EINVAL; } if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < le32_to_cpu(es->s_inodes_count)) { - ext4_warning(sb, __func__, "inodes_count overflow"); + ext4_warning(sb, "inodes_count overflow"); return -EINVAL; } @@ -774,14 +757,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || !le16_to_cpu(es->s_reserved_gdt_blocks)) { - ext4_warning(sb, __func__, + ext4_warning(sb, "No reserved GDT blocks, can't resize"); return -EPERM; } inode = ext4_iget(sb, EXT4_RESIZE_INO); if (IS_ERR(inode)) { - ext4_warning(sb, __func__, - "Error opening resize inode"); + ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(inode); } } @@ -810,8 +792,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { - ext4_warning(sb, __func__, - "multiple resizers run on filesystem!"); + ext4_warning(sb, "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_journal; } @@ -997,13 +978,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, " too large to resize to %llu blocks safely\n", sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) - ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled"); + ext4_warning(sb, "CONFIG_LBDAF not enabled"); return -EINVAL; } if (n_blocks_count < o_blocks_count) { - ext4_warning(sb, __func__, - "can't shrink FS - resize aborted"); + ext4_warning(sb, "can't shrink FS - resize aborted"); return -EBUSY; } @@ -1011,15 +991,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); if (last == 0) { - ext4_warning(sb, __func__, - "need to use ext2online to resize further"); + ext4_warning(sb, "need to use ext2online to resize further"); return -EPERM; } add = EXT4_BLOCKS_PER_GROUP(sb) - last; if (o_blocks_count + add < o_blocks_count) { - ext4_warning(sb, __func__, "blocks_count overflow"); + ext4_warning(sb, "blocks_count overflow"); return -EINVAL; } @@ -1027,16 +1006,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, add = n_blocks_count - o_blocks_count; if (o_blocks_count + add < n_blocks_count) - ext4_warning(sb, __func__, - "will only finish group (%llu" - " blocks, %u new)", + ext4_warning(sb, "will only finish group (%llu blocks, %u new)", o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ bh = sb_bread(sb, o_blocks_count + add - 1); if (!bh) { - ext4_warning(sb, __func__, - "can't read last block, resize aborted"); + ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC; } brelse(bh); @@ -1047,14 +1023,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, handle = ext4_journal_start_sb(sb, 3); if (IS_ERR(handle)) { err = PTR_ERR(handle); - ext4_warning(sb, __func__, "error %d on journal start", err); + ext4_warning(sb, "error %d on journal start", err); goto exit_put; } mutex_lock(&EXT4_SB(sb)->s_resize_lock); if (o_blocks_count != ext4_blocks_count(es)) { - ext4_warning(sb, __func__, - "multiple resizers run on filesystem!"); + ext4_warning(sb, "multiple resizers run on filesystem!"); mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); err = -EBUSY; @@ -1063,8 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) { - ext4_warning(sb, __func__, - "error %d on journal write access", err); + ext4_warning(sb, "error %d on journal write access", err); mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 735c20d5fd5..2b83b96cb2e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -333,7 +333,7 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } -void ext4_error(struct super_block *sb, const char *function, +void __ext4_error(struct super_block *sb, const char *function, const char *fmt, ...) { va_list args; @@ -347,6 +347,42 @@ void ext4_error(struct super_block *sb, const char *function, ext4_handle_error(sb); } +void ext4_error_inode(const char *function, struct inode *inode, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", + inode->i_sb->s_id, function, inode->i_ino, current->comm); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + ext4_handle_error(inode->i_sb); +} + +void ext4_error_file(const char *function, struct file *file, + const char *fmt, ...) +{ + va_list args; + struct inode *inode = file->f_dentry->d_inode; + char pathname[80], *path; + + va_start(args, fmt); + path = d_path(&(file->f_path), pathname, sizeof(pathname)); + if (!path) + path = "(unknown)"; + printk(KERN_CRIT + "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", + inode->i_sb->s_id, function, inode->i_ino, current->comm, path); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + ext4_handle_error(inode->i_sb); +} + static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]) { @@ -450,7 +486,7 @@ void ext4_msg (struct super_block * sb, const char *prefix, va_end(args); } -void ext4_warning(struct super_block *sb, const char *function, +void __ext4_warning(struct super_block *sb, const char *function, const char *fmt, ...) { va_list args; @@ -507,7 +543,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) return; - ext4_warning(sb, __func__, + ext4_warning(sb, "updating to rev %d because of new feature flag, " "running e2fsck is recommended", EXT4_DYNAMIC_REV); @@ -708,7 +744,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; #endif - INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); + INIT_LIST_HEAD(&ei->i_completed_io_list); + spin_lock_init(&ei->i_completed_io_lock); ei->cur_aio_dio = NULL; ei->i_sync_tid = 0; ei->i_datasync_tid = 0; @@ -761,6 +798,7 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { + dquot_drop(inode); ext4_discard_preallocations(inode); if (EXT4_JOURNAL(inode)) jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, @@ -796,10 +834,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); #endif } @@ -926,6 +964,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, NOLOAD)) seq_puts(seq, ",norecovery"); + if (test_opt(sb, DIOREAD_NOLOCK)) + seq_puts(seq, ",dioread_nolock"); + ext4_show_quota_options(seq, sb); return 0; @@ -1012,19 +1053,9 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); static const struct dquot_operations ext4_quota_operations = { - .initialize = dquot_initialize, - .drop = dquot_drop, - .alloc_space = dquot_alloc_space, - .reserve_space = dquot_reserve_space, - .claim_space = dquot_claim_space, - .release_rsv = dquot_release_reserved_space, #ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, #endif - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, .write_dquot = ext4_write_dquot, .acquire_dquot = ext4_acquire_dquot, .release_dquot = ext4_release_dquot, @@ -1109,6 +1140,7 @@ enum { Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, + Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, }; @@ -1176,6 +1208,8 @@ static const match_table_t tokens = { {Opt_auto_da_alloc, "auto_da_alloc=%u"}, {Opt_auto_da_alloc, "auto_da_alloc"}, {Opt_noauto_da_alloc, "noauto_da_alloc"}, + {Opt_dioread_nolock, "dioread_nolock"}, + {Opt_dioread_lock, "dioread_lock"}, {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, {Opt_err, NULL}, @@ -1205,6 +1239,66 @@ static ext4_fsblk_t get_sb_block(void **data) } #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) +static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" + "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; + +#ifdef CONFIG_QUOTA +static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *qname; + + if (sb_any_quota_loaded(sb) && + !sbi->s_qf_names[qtype]) { + ext4_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); + return 0; + } + qname = match_strdup(args); + if (!qname) { + ext4_msg(sb, KERN_ERR, + "Not enough memory for storing quotafile name"); + return 0; + } + if (sbi->s_qf_names[qtype] && + strcmp(sbi->s_qf_names[qtype], qname)) { + ext4_msg(sb, KERN_ERR, + "%s quota file already specified", QTYPE2NAME(qtype)); + kfree(qname); + return 0; + } + sbi->s_qf_names[qtype] = qname; + if (strchr(sbi->s_qf_names[qtype], '/')) { + ext4_msg(sb, KERN_ERR, + "quotafile must be on filesystem root"); + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; + } + set_opt(sbi->s_mount_opt, QUOTA); + return 1; +} + +static int clear_qf_name(struct super_block *sb, int qtype) +{ + + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (sb_any_quota_loaded(sb) && + sbi->s_qf_names[qtype]) { + ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" + " when quota turned on"); + return 0; + } + /* + * The space will be released later when all options are confirmed + * to be correct + */ + sbi->s_qf_names[qtype] = NULL; + return 1; +} +#endif static int parse_options(char *options, struct super_block *sb, unsigned long *journal_devnum, @@ -1217,8 +1311,7 @@ static int parse_options(char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype, qfmt; - char *qname; + int qfmt; #endif if (!options) @@ -1229,19 +1322,31 @@ static int parse_options(char *options, struct super_block *sb, if (!*p) continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); clear_opt(sbi->s_mount_opt, MINIX_DF); break; case Opt_minix_df: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); set_opt(sbi->s_mount_opt, MINIX_DF); + break; case Opt_grpid: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); set_opt(sbi->s_mount_opt, GRPID); + break; case Opt_nogrpid: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); clear_opt(sbi->s_mount_opt, GRPID); + break; case Opt_resuid: if (match_int(&args[0], &option)) @@ -1378,14 +1483,13 @@ static int parse_options(char *options, struct super_block *sb, data_opt = EXT4_MOUNT_WRITEBACK_DATA; datacheck: if (is_remount) { - if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) - != data_opt) { + if (test_opt(sb, DATA_FLAGS) != data_opt) { ext4_msg(sb, KERN_ERR, "Cannot change data mode on remount"); return 0; } } else { - sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; + clear_opt(sbi->s_mount_opt, DATA_FLAGS); sbi->s_mount_opt |= data_opt; } break; @@ -1397,63 +1501,22 @@ static int parse_options(char *options, struct super_block *sb, break; #ifdef CONFIG_QUOTA case Opt_usrjquota: - qtype = USRQUOTA; - goto set_qf_name; - case Opt_grpjquota: - qtype = GRPQUOTA; -set_qf_name: - if (sb_any_quota_loaded(sb) && - !sbi->s_qf_names[qtype]) { - ext4_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); + if (!set_qf_name(sb, USRQUOTA, &args[0])) return 0; - } - qname = match_strdup(&args[0]); - if (!qname) { - ext4_msg(sb, KERN_ERR, - "Not enough memory for " - "storing quotafile name"); - return 0; - } - if (sbi->s_qf_names[qtype] && - strcmp(sbi->s_qf_names[qtype], qname)) { - ext4_msg(sb, KERN_ERR, - "%s quota file already " - "specified", QTYPE2NAME(qtype)); - kfree(qname); - return 0; - } - sbi->s_qf_names[qtype] = qname; - if (strchr(sbi->s_qf_names[qtype], '/')) { - ext4_msg(sb, KERN_ERR, - "quotafile must be on " - "filesystem root"); - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + break; + case Opt_grpjquota: + if (!set_qf_name(sb, GRPQUOTA, &args[0])) return 0; - } - set_opt(sbi->s_mount_opt, QUOTA); break; case Opt_offusrjquota: - qtype = USRQUOTA; - goto clear_qf_name; + if (!clear_qf_name(sb, USRQUOTA)) + return 0; + break; case Opt_offgrpjquota: - qtype = GRPQUOTA; -clear_qf_name: - if (sb_any_quota_loaded(sb) && - sbi->s_qf_names[qtype]) { - ext4_msg(sb, KERN_ERR, "Cannot change " - "journaled quota options when " - "quota turned on"); + if (!clear_qf_name(sb, GRPQUOTA)) return 0; - } - /* - * The space will be released later when all options - * are confirmed to be correct - */ - sbi->s_qf_names[qtype] = NULL; break; + case Opt_jqfmt_vfsold: qfmt = QFMT_VFS_OLD; goto set_qf_format; @@ -1518,10 +1581,11 @@ set_qf_format: clear_opt(sbi->s_mount_opt, BARRIER); break; case Opt_barrier: - if (match_int(&args[0], &option)) { - set_opt(sbi->s_mount_opt, BARRIER); - break; - } + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) set_opt(sbi->s_mount_opt, BARRIER); else @@ -1594,10 +1658,11 @@ set_qf_format: set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; case Opt_auto_da_alloc: - if (match_int(&args[0], &option)) { - clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); - break; - } + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); else @@ -1609,6 +1674,12 @@ set_qf_format: case Opt_nodiscard: clear_opt(sbi->s_mount_opt, DISCARD); break; + case Opt_dioread_nolock: + set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + break; + case Opt_dioread_lock: + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + break; default: ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " @@ -1618,18 +1689,13 @@ set_qf_format: } #ifdef CONFIG_QUOTA if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && - sbi->s_qf_names[USRQUOTA]) + if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sbi->s_mount_opt, USRQUOTA); - if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && - sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) clear_opt(sbi->s_mount_opt, GRPQUOTA); - if ((sbi->s_qf_names[USRQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || - (sbi->s_qf_names[GRPQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { + if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { ext4_msg(sb, KERN_ERR, "old and new quota " "format mixing"); return 0; @@ -1939,7 +2005,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, } list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - vfs_dq_init(inode); + dquot_initialize(inode); if (inode->i_nlink) { ext4_msg(sb, KERN_DEBUG, "%s: truncating inode %lu to %lld bytes", @@ -2432,8 +2498,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) def_mount_opts = le32_to_cpu(es->s_default_mount_opts); if (def_mount_opts & EXT4_DEFM_DEBUG) set_opt(sbi->s_mount_opt, DEBUG); - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { + ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", + "2.6.38"); set_opt(sbi->s_mount_opt, GRPID); + } if (def_mount_opts & EXT4_DEFM_UID16) set_opt(sbi->s_mount_opt, NO_UID32); #ifdef CONFIG_EXT4_FS_XATTR @@ -2445,11 +2514,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, POSIX_ACL); #endif if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) - sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; + set_opt(sbi->s_mount_opt, JOURNAL_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) - sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; + set_opt(sbi->s_mount_opt, ORDERED_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) - sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; + set_opt(sbi->s_mount_opt, WRITEBACK_DATA); if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); @@ -2477,7 +2546,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || @@ -2766,7 +2835,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); - goto failed_mount4; + goto failed_mount_wq; } else { clear_opt(sbi->s_mount_opt, DATA_FLAGS); set_opt(sbi->s_mount_opt, WRITEBACK_DATA); @@ -2779,7 +2848,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); - goto failed_mount4; + goto failed_mount_wq; } if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { @@ -2818,7 +2887,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { ext4_msg(sb, KERN_ERR, "Journal does not support " "requested data journaling mode"); - goto failed_mount4; + goto failed_mount_wq; } default: break; @@ -2826,13 +2895,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); no_journal: - if (test_opt(sb, NOBH)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " "its supported only with writeback mode"); clear_opt(sbi->s_mount_opt, NOBH); } + if (test_opt(sb, DIOREAD_NOLOCK)) { + ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " + "not supported with nobh mode"); + goto failed_mount_wq; + } } EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); if (!EXT4_SB(sb)->dio_unwritten_wq) { @@ -2897,6 +2970,18 @@ no_journal: "requested data journaling mode"); clear_opt(sbi->s_mount_opt, DELALLOC); } + if (test_opt(sb, DIOREAD_NOLOCK)) { + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " + "option - requested data journaling mode"); + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + } + if (sb->s_blocksize < PAGE_SIZE) { + ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " + "option - block size is too small"); + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + } + } err = ext4_setup_system_zone(sb); if (err) { @@ -3360,10 +3445,9 @@ static void ext4_clear_journal_err(struct super_block *sb, char nbuf[16]; errstr = ext4_decode_error(sb, j_errno, nbuf); - ext4_warning(sb, __func__, "Filesystem error recorded " + ext4_warning(sb, "Filesystem error recorded " "from previous mount: %s", errstr); - ext4_warning(sb, __func__, "Marking fs in need of " - "filesystem check."); + ext4_warning(sb, "Marking fs in need of filesystem check."); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); @@ -3514,7 +3598,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_abort(sb, __func__, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); es = sbi->s_es; @@ -3708,7 +3792,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) * Process 1 Process 2 * ext4_create() quota_sync() * jbd2_journal_start() write_dquot() - * vfs_dq_init() down(dqio_mutex) + * dquot_initialize() down(dqio_mutex) * down(dqio_mutex) jbd2_journal_start() * */ @@ -3917,9 +4001,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); - int tocopy; int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -3929,52 +4011,53 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, (unsigned long long)off, (unsigned long long)len); return -EIO; } + /* + * Since we account only one data block in transaction credits, + * then it is impossible to cross a block boundary. + */ + if (sb->s_blocksize - offset < len) { + ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" + " cancelled because not block aligned", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - bh = ext4_bread(handle, inode, blk, 1, &err); - if (!bh) + bh = ext4_bread(handle, inode, blk, 1, &err); + if (!bh) + goto out; + if (journal_quota) { + err = ext4_journal_get_write_access(handle, bh); + if (err) { + brelse(bh); goto out; - if (journal_quota) { - err = ext4_journal_get_write_access(handle, bh); - if (err) { - brelse(bh); - goto out; - } } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - unlock_buffer(bh); - if (journal_quota) - err = ext4_handle_dirty_metadata(handle, NULL, bh); - else { - /* Always do at least ordered writes for quotas */ - err = ext4_jbd2_file_inode(handle, inode); - mark_buffer_dirty(bh); - } - brelse(bh); - if (err) - goto out; - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, len); + flush_dcache_page(bh->b_page); + unlock_buffer(bh); + if (journal_quota) + err = ext4_handle_dirty_metadata(handle, NULL, bh); + else { + /* Always do at least ordered writes for quotas */ + err = ext4_jbd2_file_inode(handle, inode); + mark_buffer_dirty(bh); + } + brelse(bh); out: - if (len == towrite) { + if (err) { mutex_unlock(&inode->i_mutex); return err; } - if (inode->i_size < off+len-towrite) { - i_size_write(inode, off+len-towrite); + if (inode->i_size < off + len) { + i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; } inode->i_mtime = inode->i_ctime = CURRENT_TIME; ext4_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); - return len - towrite; + return len; } #endif diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f3a2f7ed45a..b4c5aa8489d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -227,7 +227,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { -bad_block: ext4_error(inode->i_sb, __func__, +bad_block: + ext4_error(inode->i_sb, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -267,7 +268,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *end; int error; - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) return -ENODATA; error = ext4_get_inode_loc(inode, &iloc); if (error) @@ -371,7 +372,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -396,7 +397,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) void *end; int error; - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) return 0; error = ext4_get_inode_loc(inode, &iloc); if (error) @@ -494,7 +495,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); if (ce) @@ -665,9 +666,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); if (ext4_xattr_check_block(bs->bh)) { - ext4_error(sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -787,8 +787,8 @@ inserted: else { /* The old block is released after updating the inode. */ - error = -EDQUOT; - if (vfs_dq_alloc_block(inode, 1)) + error = dquot_alloc_block(inode, 1); + if (error) goto cleanup; error = ext4_journal_get_write_access(handle, new_bh); @@ -876,13 +876,12 @@ cleanup: return error; cleanup_dquot: - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); goto cleanup; bad_block: - ext4_error(inode->i_sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; #undef header @@ -908,7 +907,7 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, is->s.base = is->s.first = IFIRST(header); is->s.here = is->s.first; is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { error = ext4_xattr_check_names(IFIRST(header), is->s.end); if (error) return error; @@ -940,10 +939,10 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); - EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); } else { header->h_magic = cpu_to_le32(0); - EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; + ext4_clear_inode_state(inode, EXT4_STATE_XATTR); } return 0; } @@ -986,8 +985,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (strlen(name) > 255) return -ERANGE; down_write(&EXT4_I(inode)->xattr_sem); - no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; - EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); error = ext4_get_inode_loc(inode, &is.iloc); if (error) @@ -997,10 +996,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (error) goto cleanup; - if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) { struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; + ext4_clear_inode_state(inode, EXT4_STATE_NEW); } error = ext4_xattr_ibody_find(inode, &i, &is); @@ -1052,7 +1051,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); if (!value) - EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1067,7 +1066,7 @@ cleanup: brelse(is.iloc.bh); brelse(bs.bh); if (no_expand == 0) - EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); up_write(&EXT4_I(inode)->xattr_sem); return error; } @@ -1195,9 +1194,8 @@ retry: if (!bh) goto cleanup; if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -1302,6 +1300,8 @@ retry: /* Remove the chosen entry from the inode */ error = ext4_xattr_ibody_set(handle, inode, &i, is); + if (error) + goto cleanup; entry = IFIRST(header); if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) @@ -1372,16 +1372,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) goto cleanup; bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); if (!bh) { - ext4_error(inode->i_sb, __func__, - "inode %lu: block %llu read error", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: block %llu read error", + inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; } if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) { - ext4_error(inode->i_sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; } ext4_xattr_release_block(handle, inode, bh); @@ -1506,7 +1504,7 @@ again: } bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "inode %lu: block %lu read error", inode->i_ino, (unsigned long) ce->e_block); } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 14da530b05c..fbeecdc194d 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -577,7 +577,7 @@ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, return i_pos; } -static int fat_write_inode(struct inode *inode, int wait) +static int __fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -634,9 +634,14 @@ retry: return err; } +static int fat_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int fat_sync_inode(struct inode *inode) { - return fat_write_inode(inode, 1); + return __fat_write_inode(inode, 1); } EXPORT_SYMBOL_GPL(fat_sync_inode); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1a7c42c64ff..76fc4d594ac 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -381,10 +381,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); } -static int write_inode(struct inode *inode, int sync) +static int write_inode(struct inode *inode, struct writeback_control *wbc) { if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - return inode->i_sb->s_op->write_inode(inode, sync); + return inode->i_sb->s_op->write_inode(inode, wbc); return 0; } @@ -421,7 +421,6 @@ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; - int wait = wbc->sync_mode == WB_SYNC_ALL; unsigned dirty; int ret; @@ -439,7 +438,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * We'll have another go at writing back this inode when we * completed a full scan of b_io. */ - if (!wait) { + if (wbc->sync_mode != WB_SYNC_ALL) { requeue_io(inode); return 0; } @@ -461,15 +460,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ret = do_writepages(mapping, wbc); - /* Don't write the inode if only I_DIRTY_PAGES was set */ - if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { - int err = write_inode(inode, wait); + /* + * Make sure to wait on the data before writing out the metadata. + * This is important for filesystems that modify metadata on data + * I/O completion. + */ + if (wbc->sync_mode == WB_SYNC_ALL) { + int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; } - if (wait) { - int err = filemap_fdatawait(mapping); + /* Don't write the inode if only I_DIRTY_PAGES was set */ + if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + int err = write_inode(inode, wbc); if (ret == 0) ret = err; } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e3bf6eab875..6dbcbad6ab1 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1083,7 +1083,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, } } -int gfs2_quota_sync(struct super_block *sb, int type) +int gfs2_quota_sync(struct super_block *sb, int type, int wait) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; @@ -1127,6 +1127,11 @@ int gfs2_quota_sync(struct super_block *sb, int type) return error; } +static int gfs2_quota_sync_timeo(struct super_block *sb, int type) +{ + return gfs2_quota_sync(sb, type, 0); +} + int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) { struct gfs2_quota_data *qd; @@ -1382,7 +1387,7 @@ int gfs2_quotad(void *data) &tune->gt_statfs_quantum); /* Update quota file */ - quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, + quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t, "ad_timeo, &tune->gt_quota_quantum); /* Check for & recover partially truncated inodes */ diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e271fa07ad0..195f60c8bd1 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -25,7 +25,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid); -extern int gfs2_quota_sync(struct super_block *sb, int type); +extern int gfs2_quota_sync(struct super_block *sb, int type, int wait); extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); extern int gfs2_quota_init(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5e22629da6..50aac606b99 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -22,6 +22,7 @@ #include <linux/crc32.h> #include <linux/time.h> #include <linux/wait.h> +#include <linux/writeback.h> #include "gfs2.h" #include "incore.h" @@ -711,7 +712,7 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) * Returns: errno */ -static int gfs2_write_inode(struct inode *inode, int sync) +static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -745,7 +746,7 @@ static int gfs2_write_inode(struct inode *inode, int sync) do_unlock: gfs2_glock_dq_uninit(&gh); do_flush: - if (sync != 0) + if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); return ret; } @@ -763,7 +764,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int error; flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_quota_sync(sdp->sd_vfs, 0, 1); gfs2_statfs_sync(sdp->sd_vfs, 0); error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index a0db1c94317..b5f1a46133c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -167,7 +167,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_quota_sync(sdp->sd_vfs, 0, 1); return len; } diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 052387e1167..fe35e3b626c 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -188,7 +188,7 @@ extern const struct address_space_operations hfs_btree_aops; extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); -extern int hfs_write_inode(struct inode *, int); +extern int hfs_write_inode(struct inode *, struct writeback_control *); extern int hfs_inode_setattr(struct dentry *, struct iattr *); extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, __be32 log_size, __be32 phys_size, u32 clump_size); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index a1cbff2b4d9..14f5cb1b9fd 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext, HFS_SB(inode->i_sb)->alloc_blksz); } -int hfs_write_inode(struct inode *inode, int unused) +int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct inode *main_inode = inode; struct hfs_find_data fd; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 43022f3d514..74b473a8ef9 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -87,7 +87,8 @@ bad_inode: return ERR_PTR(err); } -static int hfsplus_write_inode(struct inode *inode, int unused) +static int hfsplus_write_inode(struct inode *inode, + struct writeback_control *wbc) { struct hfsplus_vh *vhdr; int ret = 0; diff --git a/fs/inode.c b/fs/inode.c index 03dfeb2e392..407bf392e20 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -8,7 +8,6 @@ #include <linux/mm.h> #include <linux/dcache.h> #include <linux/init.h> -#include <linux/quotaops.h> #include <linux/slab.h> #include <linux/writeback.h> #include <linux/module.h> @@ -314,7 +313,6 @@ void clear_inode(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); inode_sync_wait(inode); - vfs_dq_drop(inode); if (inode->i_sb->s_op->clear_inode) inode->i_sb->s_op->clear_inode(inode); if (S_ISBLK(inode->i_mode) && inode->i_bdev) @@ -1211,8 +1209,6 @@ void generic_delete_inode(struct inode *inode) if (op->delete_inode) { void (*delete)(struct inode *) = op->delete_inode; - if (!is_bad_inode(inode)) - vfs_dq_init(inode); /* Filesystems implementing their own * s_op->delete_inode are required to call * truncate_inode_pages and clear_inode() diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 4bd882548c4..2c90e3ef625 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -862,12 +862,12 @@ restart_loop: /* A buffer which has been freed while still being * journaled by a previous transaction may end up still * being dirty here, but we want to avoid writing back - * that buffer in the future now that the last use has - * been committed. That's not only a performance gain, - * it also stops aliasing problems if the buffer is left - * behind for writeback and gets reallocated for another + * that buffer in the future after the "add to orphan" + * operation been committed, That's not only a performance + * gain, it also stops aliasing problems if the buffer is + * left behind for writeback and gets reallocated for another * use in a different page. */ - if (buffer_freed(bh)) { + if (buffer_freed(bh) && !jh->b_next_transaction) { clear_buffer_freed(bh); clear_buffer_jbddirty(bh); } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 006f9ad838a..99e9fea1107 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1864,6 +1864,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) if (!jh) goto zap_buffer_no_jh; + /* + * We cannot remove the buffer from checkpoint lists until the + * transaction adding inode to orphan list (let's call it T) + * is committed. Otherwise if the transaction changing the + * buffer would be cleaned from the journal before T is + * committed, a crash will cause that the correct contents of + * the buffer will be lost. On the other hand we have to + * clear the buffer dirty bit at latest at the moment when the + * transaction marking the buffer as freed in the filesystem + * structures is committed because from that moment on the + * buffer can be reallocated and used by a different page. + * Since the block hasn't been freed yet but the inode has + * already been added to orphan list, it is safe for us to add + * the buffer to BJ_Forget list of the newest transaction. + */ transaction = jh->b_transaction; if (transaction == NULL) { /* First case: not on any transaction. If it @@ -1929,16 +1944,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) goto zap_buffer; } /* - * If it is committing, we simply cannot touch it. We - * can remove it's next_transaction pointer from the - * running transaction if that is set, but nothing - * else. */ + * The buffer is committing, we simply cannot touch + * it. So we just set j_next_transaction to the + * running transaction (if there is one) and mark + * buffer as freed so that commit code knows it should + * clear dirty bits when it is done with the buffer. + */ set_buffer_freed(bh); - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == - journal->j_running_transaction); - jh->b_next_transaction = NULL; - } + if (journal->j_running_transaction && buffer_jbddirty(bh)) + jh->b_next_transaction = journal->j_running_transaction; journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -2120,7 +2134,7 @@ void journal_file_buffer(struct journal_head *jh, */ void __journal_refile_buffer(struct journal_head *jh) { - int was_dirty; + int was_dirty, jlist; struct buffer_head *bh = jh2bh(jh); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); @@ -2142,8 +2156,13 @@ void __journal_refile_buffer(struct journal_head *jh) __journal_temp_unlink_buffer(jh); jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; - __journal_file_buffer(jh, jh->b_transaction, - jh->b_modified ? BJ_Metadata : BJ_Reserved); + if (buffer_freed(bh)) + jlist = BJ_Forget; + else if (jh->b_modified) + jlist = BJ_Metadata; + else + jlist = BJ_Reserved; + __journal_file_buffer(jh, jh->b_transaction, jlist); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 88684937095..30beb11ef92 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -507,6 +507,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) if (blocknr < journal->j_tail) freed = freed + journal->j_last - journal->j_first; + trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); jbd_debug(1, "Cleaning journal tail from %d to %d (offset %lu), " "freeing %lu\n", diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 1bc74b6f26d..671da7fb7ff 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -883,8 +883,7 @@ restart_loop: spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); jbd_lock_bh_state(bh); - J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || - jh->b_transaction == journal->j_running_transaction); + J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); /* * If there is undo-protected committed data against @@ -930,12 +929,12 @@ restart_loop: /* A buffer which has been freed while still being * journaled by a previous transaction may end up still * being dirty here, but we want to avoid writing back - * that buffer in the future now that the last use has - * been committed. That's not only a performance gain, - * it also stops aliasing problems if the buffer is left - * behind for writeback and gets reallocated for another + * that buffer in the future after the "add to orphan" + * operation been committed, That's not only a performance + * gain, it also stops aliasing problems if the buffer is + * left behind for writeback and gets reallocated for another * use in a different page. */ - if (buffer_freed(bh)) { + if (buffer_freed(bh) && !jh->b_next_transaction) { clear_buffer_freed(bh); clear_buffer_jbddirty(bh); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ac0d027595d..c03d4dce4d7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -39,6 +39,8 @@ #include <linux/seq_file.h> #include <linux/math64.h> #include <linux/hash.h> +#include <linux/log2.h> +#include <linux/vmalloc.h> #define CREATE_TRACE_POINTS #include <trace/events/jbd2.h> @@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static int jbd2_journal_create_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal) } } + /* + * Create a slab for this blocksize + */ + err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (jbd2_journal_recover(journal)) @@ -1807,6 +1817,127 @@ size_t journal_tag_bytes(journal_t *journal) } /* + * JBD memory management + * + * These functions are used to allocate block-sized chunks of memory + * used for making copies of buffer_head data. Very often it will be + * page-sized chunks of data, but sometimes it will be in + * sub-page-size chunks. (For example, 16k pages on Power systems + * with a 4k block file system.) For blocks smaller than a page, we + * use a SLAB allocator. There are slab caches for each block size, + * which are allocated at mount time, if necessary, and we only free + * (all of) the slab caches when/if the jbd2 module is unloaded. For + * this reason we don't need to a mutex to protect access to + * jbd2_slab[] allocating or releasing memory; only in + * jbd2_journal_create_slab(). + */ +#define JBD2_MAX_SLABS 8 +static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; +static DECLARE_MUTEX(jbd2_slab_create_sem); + +static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { + "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", + "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" +}; + + +static void jbd2_journal_destroy_slabs(void) +{ + int i; + + for (i = 0; i < JBD2_MAX_SLABS; i++) { + if (jbd2_slab[i]) + kmem_cache_destroy(jbd2_slab[i]); + jbd2_slab[i] = NULL; + } +} + +static int jbd2_journal_create_slab(size_t size) +{ + int i = order_base_2(size) - 10; + size_t slab_size; + + if (size == PAGE_SIZE) + return 0; + + if (i >= JBD2_MAX_SLABS) + return -EINVAL; + + if (unlikely(i < 0)) + i = 0; + down(&jbd2_slab_create_sem); + if (jbd2_slab[i]) { + up(&jbd2_slab_create_sem); + return 0; /* Already created */ + } + + slab_size = 1 << (i+10); + jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, + slab_size, 0, NULL); + up(&jbd2_slab_create_sem); + if (!jbd2_slab[i]) { + printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +static struct kmem_cache *get_slab(size_t size) +{ + int i = order_base_2(size) - 10; + + BUG_ON(i >= JBD2_MAX_SLABS); + if (unlikely(i < 0)) + i = 0; + BUG_ON(jbd2_slab[i] == 0); + return jbd2_slab[i]; +} + +void *jbd2_alloc(size_t size, gfp_t flags) +{ + void *ptr; + + BUG_ON(size & (size-1)); /* Must be a power of 2 */ + + flags |= __GFP_REPEAT; + if (size == PAGE_SIZE) + ptr = (void *)__get_free_pages(flags, 0); + else if (size > PAGE_SIZE) { + int order = get_order(size); + + if (order < 3) + ptr = (void *)__get_free_pages(flags, order); + else + ptr = vmalloc(size); + } else + ptr = kmem_cache_alloc(get_slab(size), flags); + + /* Check alignment; SLUB has gotten this wrong in the past, + * and this can lead to user data corruption! */ + BUG_ON(((unsigned long) ptr) & (size-1)); + + return ptr; +} + +void jbd2_free(void *ptr, size_t size) +{ + if (size == PAGE_SIZE) { + free_pages((unsigned long)ptr, 0); + return; + } + if (size > PAGE_SIZE) { + int order = get_order(size); + + if (order < 3) + free_pages((unsigned long)ptr, order); + else + vfree(ptr); + return; + } + kmem_cache_free(get_slab(size), ptr); +}; + +/* * Journal_head storage management */ static struct kmem_cache *jbd2_journal_head_cache; @@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void) jbd2_journal_destroy_revoke_caches(); jbd2_journal_destroy_jbd2_journal_head_cache(); jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_slabs(); } static int __init journal_init(void) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a0512700542..bfc70f57900 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1727,6 +1727,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) if (!jh) goto zap_buffer_no_jh; + /* + * We cannot remove the buffer from checkpoint lists until the + * transaction adding inode to orphan list (let's call it T) + * is committed. Otherwise if the transaction changing the + * buffer would be cleaned from the journal before T is + * committed, a crash will cause that the correct contents of + * the buffer will be lost. On the other hand we have to + * clear the buffer dirty bit at latest at the moment when the + * transaction marking the buffer as freed in the filesystem + * structures is committed because from that moment on the + * buffer can be reallocated and used by a different page. + * Since the block hasn't been freed yet but the inode has + * already been added to orphan list, it is safe for us to add + * the buffer to BJ_Forget list of the newest transaction. + */ transaction = jh->b_transaction; if (transaction == NULL) { /* First case: not on any transaction. If it @@ -1783,16 +1798,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } else if (transaction == journal->j_committing_transaction) { JBUFFER_TRACE(jh, "on committing transaction"); /* - * If it is committing, we simply cannot touch it. We - * can remove it's next_transaction pointer from the - * running transaction if that is set, but nothing - * else. */ + * The buffer is committing, we simply cannot touch + * it. So we just set j_next_transaction to the + * running transaction (if there is one) and mark + * buffer as freed so that commit code knows it should + * clear dirty bits when it is done with the buffer. + */ set_buffer_freed(bh); - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == - journal->j_running_transaction); - jh->b_next_transaction = NULL; - } + if (journal->j_running_transaction && buffer_jbddirty(bh)) + jh->b_next_transaction = journal->j_running_transaction; jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -1969,7 +1983,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh, */ void __jbd2_journal_refile_buffer(struct journal_head *jh) { - int was_dirty; + int was_dirty, jlist; struct buffer_head *bh = jh2bh(jh); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); @@ -1991,8 +2005,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; - __jbd2_journal_file_buffer(jh, jh->b_transaction, - jh->b_modified ? BJ_Metadata : BJ_Reserved); + if (buffer_freed(bh)) + jlist = BJ_Forget; + else if (jh->b_modified) + jlist = BJ_Metadata; + else + jlist = BJ_Reserved; + __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); if (was_dirty) diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index d66477c3430..213169780b6 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -20,7 +20,6 @@ #include <linux/sched.h> #include <linux/fs.h> -#include <linux/quotaops.h> #include <linux/posix_acl_xattr.h> #include "jfs_incore.h" #include "jfs_txnmgr.h" @@ -174,7 +173,7 @@ cleanup: return rc; } -static int jfs_acl_chmod(struct inode *inode) +int jfs_acl_chmod(struct inode *inode) { struct posix_acl *acl, *clone; int rc; @@ -205,26 +204,3 @@ static int jfs_acl_chmod(struct inode *inode) posix_acl_release(clone); return rc; } - -int jfs_setattr(struct dentry *dentry, struct iattr *iattr) -{ - struct inode *inode = dentry->d_inode; - int rc; - - rc = inode_change_ok(inode, iattr); - if (rc) - return rc; - - if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { - if (vfs_dq_transfer(inode, iattr)) - return -EDQUOT; - } - - rc = inode_setattr(inode, iattr); - - if (!rc && (iattr->ia_valid & ATTR_MODE)) - rc = jfs_acl_chmod(inode); - - return rc; -} diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 2b70fa78e4a..14ba982b3f2 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -18,6 +18,7 @@ */ #include <linux/fs.h> +#include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_dmap.h" @@ -47,7 +48,7 @@ static int jfs_open(struct inode *inode, struct file *file) { int rc; - if ((rc = generic_file_open(inode, file))) + if ((rc = dquot_file_open(inode, file))) return rc; /* @@ -88,14 +89,40 @@ static int jfs_release(struct inode *inode, struct file *file) return 0; } +int jfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = dentry->d_inode; + int rc; + + rc = inode_change_ok(inode, iattr); + if (rc) + return rc; + + if (iattr->ia_valid & ATTR_SIZE) + dquot_initialize(inode); + if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + rc = dquot_transfer(inode, iattr); + if (rc) + return rc; + } + + rc = inode_setattr(inode, iattr); + + if (!rc && (iattr->ia_valid & ATTR_MODE)) + rc = jfs_acl_chmod(inode); + + return rc; +} + const struct inode_operations jfs_file_inode_operations = { .truncate = jfs_truncate, .setxattr = jfs_setxattr, .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, .removexattr = jfs_removexattr, -#ifdef CONFIG_JFS_POSIX_ACL .setattr = jfs_setattr, +#ifdef CONFIG_JFS_POSIX_ACL .check_acl = jfs_check_acl, #endif }; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index b2ae190a77b..9dd126276c9 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -22,6 +22,7 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> +#include <linux/writeback.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -120,8 +121,10 @@ int jfs_commit_inode(struct inode *inode, int wait) return rc; } -int jfs_write_inode(struct inode *inode, int wait) +int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { + int wait = wbc->sync_mode == WB_SYNC_ALL; + if (test_cflag(COMMIT_Nolink, inode)) return 0; /* @@ -146,6 +149,9 @@ void jfs_delete_inode(struct inode *inode) { jfs_info("In jfs_delete_inode, inode = 0x%p", inode); + if (!is_bad_inode(inode)) + dquot_initialize(inode); + if (!is_bad_inode(inode) && (JFS_IP(inode)->fileset == FILESYSTEM_I)) { truncate_inode_pages(&inode->i_data, 0); @@ -158,9 +164,9 @@ void jfs_delete_inode(struct inode *inode) /* * Free the inode from the quota allocation. */ - vfs_dq_init(inode); - vfs_dq_free_inode(inode); - vfs_dq_drop(inode); + dquot_initialize(inode); + dquot_free_inode(inode); + dquot_drop(inode); } clear_inode(inode); diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index b07bd417ef8..54e07559878 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -22,7 +22,7 @@ int jfs_check_acl(struct inode *, int); int jfs_init_acl(tid_t, struct inode *, struct inode *); -int jfs_setattr(struct dentry *, struct iattr *); +int jfs_acl_chmod(struct inode *inode); #else @@ -32,5 +32,10 @@ static inline int jfs_init_acl(tid_t tid, struct inode *inode, return 0; } +static inline int jfs_acl_chmod(struct inode *inode) +{ + return 0; +} + #endif #endif /* _H_JFS_ACL */ diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 925871e9887..0e4623be70c 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -381,10 +381,10 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) * It's time to move the inline table to an external * page and begin to build the xtree */ - if (vfs_dq_alloc_block(ip, sbi->nbperpage)) + if (dquot_alloc_block(ip, sbi->nbperpage)) goto clean_up; if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { - vfs_dq_free_block(ip, sbi->nbperpage); + dquot_free_block(ip, sbi->nbperpage); goto clean_up; } @@ -408,7 +408,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) memcpy(&jfs_ip->i_dirtable, temp_table, sizeof (temp_table)); dbFree(ip, xaddr, sbi->nbperpage); - vfs_dq_free_block(ip, sbi->nbperpage); + dquot_free_block(ip, sbi->nbperpage); goto clean_up; } ip->i_size = PSIZE; @@ -1027,10 +1027,9 @@ static int dtSplitUp(tid_t tid, n = xlen; /* Allocate blocks to quota. */ - if (vfs_dq_alloc_block(ip, n)) { - rc = -EDQUOT; + rc = dquot_alloc_block(ip, n); + if (rc) goto extendOut; - } quota_allocation += n; if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, @@ -1308,7 +1307,7 @@ static int dtSplitUp(tid_t tid, /* Rollback quota allocation */ if (rc && quota_allocation) - vfs_dq_free_block(ip, quota_allocation); + dquot_free_block(ip, quota_allocation); dtSplitUp_Exit: @@ -1369,9 +1368,10 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, return -EIO; /* Allocate blocks to quota. */ - if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) { release_metapage(rmp); - return -EDQUOT; + return rc; } jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); @@ -1892,6 +1892,7 @@ static int dtSplitRoot(tid_t tid, struct dt_lock *dtlck; struct tlock *tlck; struct lv *lv; + int rc; /* get split root page */ smp = split->mp; @@ -1916,9 +1917,10 @@ static int dtSplitRoot(tid_t tid, rp = rmp->data; /* Allocate blocks to quota. */ - if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) { release_metapage(rmp); - return -EDQUOT; + return rc; } BT_MARK_DIRTY(rmp, ip); @@ -2287,7 +2289,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, xlen = lengthPXD(&fp->header.self); /* Free quota allocation. */ - vfs_dq_free_block(ip, xlen); + dquot_free_block(ip, xlen); /* free/invalidate its buffer page */ discard_metapage(fmp); @@ -2363,7 +2365,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, xlen = lengthPXD(&p->header.self); /* Free quota allocation */ - vfs_dq_free_block(ip, xlen); + dquot_free_block(ip, xlen); /* free/invalidate its buffer page */ discard_metapage(mp); diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 41d6045dbeb..5d3bbd10f8d 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -141,10 +141,11 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) } /* Allocate blocks to quota. */ - if (vfs_dq_alloc_block(ip, nxlen)) { + rc = dquot_alloc_block(ip, nxlen); + if (rc) { dbFree(ip, nxaddr, (s64) nxlen); mutex_unlock(&JFS_IP(ip)->commit_mutex); - return -EDQUOT; + return rc; } /* determine the value of the extent flag */ @@ -164,7 +165,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) */ if (rc) { dbFree(ip, nxaddr, nxlen); - vfs_dq_free_block(ip, nxlen); + dquot_free_block(ip, nxlen); mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); } @@ -256,10 +257,11 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) goto exit; /* Allocat blocks to quota. */ - if (vfs_dq_alloc_block(ip, nxlen)) { + rc = dquot_alloc_block(ip, nxlen); + if (rc) { dbFree(ip, nxaddr, (s64) nxlen); mutex_unlock(&JFS_IP(ip)->commit_mutex); - return -EDQUOT; + return rc; } delta = nxlen - xlen; @@ -297,7 +299,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) /* extend the extent */ if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { dbFree(ip, xaddr + xlen, delta); - vfs_dq_free_block(ip, nxlen); + dquot_free_block(ip, nxlen); goto exit; } } else { @@ -308,7 +310,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) */ if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { dbFree(ip, nxaddr, nxlen); - vfs_dq_free_block(ip, nxlen); + dquot_free_block(ip, nxlen); goto exit; } } diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index dc0e02159ac..829921b6776 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -116,10 +116,10 @@ struct inode *ialloc(struct inode *parent, umode_t mode) /* * Allocate inode to quota. */ - if (vfs_dq_alloc_inode(inode)) { - rc = -EDQUOT; + dquot_initialize(inode); + rc = dquot_alloc_inode(inode); + if (rc) goto fail_drop; - } inode->i_mode = mode; /* inherit flags from parent */ @@ -162,7 +162,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) return inode; fail_drop: - vfs_dq_drop(inode); + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; fail_unlock: inode->i_nlink = 0; diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 1eff7db34d6..79e2c79661d 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -26,7 +26,7 @@ extern long jfs_ioctl(struct file *, unsigned int, unsigned long); extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); -extern int jfs_write_inode(struct inode*, int); +extern int jfs_write_inode(struct inode *, struct writeback_control *); extern void jfs_delete_inode(struct inode *); extern void jfs_dirty_inode(struct inode *); extern void jfs_truncate(struct inode *); @@ -40,6 +40,7 @@ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); extern void jfs_set_inode_flags(struct inode *); extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); +extern int jfs_setattr(struct dentry *, struct iattr *); extern const struct address_space_operations jfs_aops; extern const struct inode_operations jfs_dir_inode_operations; diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index d654a645864..6c50871e622 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -585,10 +585,10 @@ int xtInsert(tid_t tid, /* transaction id */ hint = addressXAD(xad) + lengthXAD(xad) - 1; } else hint = 0; - if ((rc = vfs_dq_alloc_block(ip, xlen))) + if ((rc = dquot_alloc_block(ip, xlen))) goto out; if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { - vfs_dq_free_block(ip, xlen); + dquot_free_block(ip, xlen); goto out; } } @@ -617,7 +617,7 @@ int xtInsert(tid_t tid, /* transaction id */ /* undo data extent allocation */ if (*xaddrp == 0) { dbFree(ip, xaddr, (s64) xlen); - vfs_dq_free_block(ip, xlen); + dquot_free_block(ip, xlen); } return rc; } @@ -985,10 +985,9 @@ xtSplitPage(tid_t tid, struct inode *ip, rbn = addressPXD(pxd); /* Allocate blocks to quota. */ - if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { - rc = -EDQUOT; + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) goto clean_up; - } quota_allocation += lengthPXD(pxd); @@ -1195,7 +1194,7 @@ xtSplitPage(tid_t tid, struct inode *ip, /* Rollback quota allocation. */ if (quota_allocation) - vfs_dq_free_block(ip, quota_allocation); + dquot_free_block(ip, quota_allocation); return (rc); } @@ -1235,6 +1234,7 @@ xtSplitRoot(tid_t tid, struct pxdlist *pxdlist; struct tlock *tlck; struct xtlock *xtlck; + int rc; sp = &JFS_IP(ip)->i_xtroot; @@ -1252,9 +1252,10 @@ xtSplitRoot(tid_t tid, return -EIO; /* Allocate blocks to quota. */ - if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) { release_metapage(rmp); - return -EDQUOT; + return rc; } jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); @@ -3680,7 +3681,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) ip->i_size = newsize; /* update quota allocation to reflect freed blocks */ - vfs_dq_free_block(ip, nfreed); + dquot_free_block(ip, nfreed); /* * free tlock of invalidated pages diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index c79a4270f08..4a3e9f39c21 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -85,6 +85,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name); + dquot_initialize(dip); + /* * search parent directory for entry/freespace * (dtSearch() returns parent directory page pinned) @@ -215,6 +217,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name); + dquot_initialize(dip); + /* link count overflow on parent directory ? */ if (dip->i_nlink == JFS_LINK_MAX) { rc = -EMLINK; @@ -356,7 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); /* Init inode for quota operations. */ - vfs_dq_init(ip); + dquot_initialize(dip); + dquot_initialize(ip); /* directory must be empty to be removed */ if (!dtEmpty(ip)) { @@ -483,7 +488,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name); /* Init inode for quota operations. */ - vfs_dq_init(ip); + dquot_initialize(dip); + dquot_initialize(ip); if ((rc = get_UCSname(&dname, dentry))) goto out; @@ -805,6 +811,8 @@ static int jfs_link(struct dentry *old_dentry, if (ip->i_nlink == 0) return -ENOENT; + dquot_initialize(dir); + tid = txBegin(ip->i_sb, 0); mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); @@ -896,6 +904,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name); + dquot_initialize(dip); + ssize = strlen(name) + 1; /* @@ -1087,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, new_dentry->d_name.name); + dquot_initialize(old_dir); + dquot_initialize(new_dir); + old_ip = old_dentry->d_inode; new_ip = new_dentry->d_inode; @@ -1136,7 +1149,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, } else if (new_ip) { IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); /* Init inode for quota operations. */ - vfs_dq_init(new_ip); + dquot_initialize(new_ip); } /* @@ -1360,6 +1373,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, jfs_info("jfs_mknod: %s", dentry->d_name.name); + dquot_initialize(dir); + if ((rc = get_UCSname(&dname, dentry))) goto out; @@ -1541,8 +1556,8 @@ const struct inode_operations jfs_dir_inode_operations = { .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, .removexattr = jfs_removexattr, -#ifdef CONFIG_JFS_POSIX_ACL .setattr = jfs_setattr, +#ifdef CONFIG_JFS_POSIX_ACL .check_acl = jfs_check_acl, #endif }; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index d929a822a74..266699deb1c 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -131,6 +131,11 @@ static void jfs_destroy_inode(struct inode *inode) kmem_cache_free(jfs_inode_cachep, ji); } +static void jfs_clear_inode(struct inode *inode) +{ + dquot_drop(inode); +} + static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); @@ -745,6 +750,7 @@ static const struct super_operations jfs_super_operations = { .dirty_inode = jfs_dirty_inode, .write_inode = jfs_write_inode, .delete_inode = jfs_delete_inode, + .clear_inode = jfs_clear_inode, .put_super = jfs_put_super, .sync_fs = jfs_sync_fs, .freeze_fs = jfs_freeze, diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index fad364548bc..1f594ab2189 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -260,14 +260,14 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits; /* Allocate new blocks to quota. */ - if (vfs_dq_alloc_block(ip, nblocks)) { - return -EDQUOT; - } + rc = dquot_alloc_block(ip, nblocks); + if (rc) + return rc; rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); if (rc) { /*Rollback quota allocation. */ - vfs_dq_free_block(ip, nblocks); + dquot_free_block(ip, nblocks); return rc; } @@ -332,7 +332,7 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, failed: /* Rollback quota allocation. */ - vfs_dq_free_block(ip, nblocks); + dquot_free_block(ip, nblocks); dbFree(ip, blkno, nblocks); return rc; @@ -538,7 +538,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) if (blocks_needed > current_blocks) { /* Allocate new blocks to quota. */ - if (vfs_dq_alloc_block(inode, blocks_needed)) + rc = dquot_alloc_block(inode, blocks_needed); + if (rc) return -EDQUOT; quota_allocation = blocks_needed; @@ -602,7 +603,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) clean_up: /* Rollback quota allocation */ if (quota_allocation) - vfs_dq_free_block(inode, quota_allocation); + dquot_free_block(inode, quota_allocation); return (rc); } @@ -677,7 +678,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf, /* If old blocks exist, they must be removed from quota allocation. */ if (old_blocks) - vfs_dq_free_block(inode, old_blocks); + dquot_free_block(inode, old_blocks); inode->i_ctime = CURRENT_TIME; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 74ea82d7216..756f8c93780 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -17,8 +17,10 @@ #include <linux/init.h> #include <linux/highuid.h> #include <linux/vfs.h> +#include <linux/writeback.h> -static int minix_write_inode(struct inode * inode, int wait); +static int minix_write_inode(struct inode *inode, + struct writeback_control *wbc); static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -552,7 +554,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) return bh; } -static int minix_write_inode(struct inode *inode, int wait) +static int minix_write_inode(struct inode *inode, struct writeback_control *wbc) { int err = 0; struct buffer_head *bh; @@ -563,7 +565,7 @@ static int minix_write_inode(struct inode *inode, int wait) bh = V2_minix_update_inode(inode); if (!bh) return -EIO; - if (wait && buffer_dirty(bh)) { + if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { printk("IO error syncing minix inode [%s:%08lx]\n", diff --git a/fs/namei.c b/fs/namei.c index 0741c69b331..3d9d2f965f8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -19,7 +19,6 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/namei.h> -#include <linux/quotaops.h> #include <linux/pagemap.h> #include <linux/fsnotify.h> #include <linux/personality.h> @@ -498,8 +497,6 @@ static int link_path_walk(const char *, struct nameidata *); static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { - int res = 0; - char *name; if (IS_ERR(link)) goto fail; @@ -510,22 +507,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l path_get(&nd->root); } - res = link_path_walk(link, nd); - if (nd->depth || res || nd->last_type!=LAST_NORM) - return res; - /* - * If it is an iterative symlinks resolution in open_namei() we - * have to copy the last component. And all that crap because of - * bloody create() on broken symlinks. Furrfu... - */ - name = __getname(); - if (unlikely(!name)) { - path_put(&nd->path); - return -ENOMEM; - } - strcpy(name, nd->last.name); - nd->last.name = name; - return 0; + return link_path_walk(link, nd); fail: path_put(&nd->path); return PTR_ERR(link); @@ -547,10 +529,10 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd) nd->path.dentry = path->dentry; } -static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) +static __always_inline int +__do_follow_link(struct path *path, struct nameidata *nd, void **p) { int error; - void *cookie; struct dentry *dentry = path->dentry; touch_atime(path->mnt, dentry); @@ -562,9 +544,9 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata } mntget(path->mnt); nd->last_type = LAST_BIND; - cookie = dentry->d_inode->i_op->follow_link(dentry, nd); - error = PTR_ERR(cookie); - if (!IS_ERR(cookie)) { + *p = dentry->d_inode->i_op->follow_link(dentry, nd); + error = PTR_ERR(*p); + if (!IS_ERR(*p)) { char *s = nd_get_link(nd); error = 0; if (s) @@ -574,8 +556,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata if (error) path_put(&nd->path); } - if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, nd, cookie); } return error; } @@ -589,6 +569,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata */ static inline int do_follow_link(struct path *path, struct nameidata *nd) { + void *cookie; int err = -ELOOP; if (current->link_count >= MAX_NESTED_LINKS) goto loop; @@ -602,7 +583,9 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd) current->link_count++; current->total_link_count++; nd->depth++; - err = __do_follow_link(path, nd); + err = __do_follow_link(path, nd, &cookie); + if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link) + path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie); path_put(path); current->link_count--; nd->depth--; @@ -1375,22 +1358,6 @@ static inline int may_create(struct inode *dir, struct dentry *child) return inode_permission(dir, MAY_WRITE | MAY_EXEC); } -/* - * O_DIRECTORY translates into forcing a directory lookup. - */ -static inline int lookup_flags(unsigned int f) -{ - unsigned long retval = LOOKUP_FOLLOW; - - if (f & O_NOFOLLOW) - retval &= ~LOOKUP_FOLLOW; - - if (f & O_DIRECTORY) - retval |= LOOKUP_DIRECTORY; - - return retval; -} - /* * p1 and p2 should be directories on the same fs. */ @@ -1448,7 +1415,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - vfs_dq_init(dir); error = dir->i_op->create(dir, dentry, mode, nd); if (!error) fsnotify_create(dir, dentry); @@ -1590,129 +1556,132 @@ static int open_will_truncate(int flag, struct inode *inode) return (flag & O_TRUNC); } -/* - * Note that the low bits of the passed in "open_flag" - * are not the same as in the local variable "flag". See - * open_to_namei_flags() for more details. - */ -struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode, int acc_mode) +static struct file *finish_open(struct nameidata *nd, + int open_flag, int acc_mode) { struct file *filp; - struct nameidata nd; - int error; - struct path path; - struct dentry *dir; - int count = 0; int will_truncate; - int flag = open_to_namei_flags(open_flag); - int force_reval = 0; + int error; + will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode); + if (will_truncate) { + error = mnt_want_write(nd->path.mnt); + if (error) + goto exit; + } + error = may_open(&nd->path, acc_mode, open_flag); + if (error) { + if (will_truncate) + mnt_drop_write(nd->path.mnt); + goto exit; + } + filp = nameidata_to_filp(nd); + if (!IS_ERR(filp)) { + error = ima_file_check(filp, acc_mode); + if (error) { + fput(filp); + filp = ERR_PTR(error); + } + } + if (!IS_ERR(filp)) { + if (will_truncate) { + error = handle_truncate(&nd->path); + if (error) { + fput(filp); + filp = ERR_PTR(error); + } + } + } /* - * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only - * check for O_DSYNC if the need any syncing at all we enforce it's - * always set instead of having to deal with possibly weird behaviour - * for malicious applications setting only __O_SYNC. + * It is now safe to drop the mnt write + * because the filp has had a write taken + * on its behalf. */ - if (open_flag & __O_SYNC) - open_flag |= O_DSYNC; - - if (!acc_mode) - acc_mode = MAY_OPEN | ACC_MODE(open_flag); + if (will_truncate) + mnt_drop_write(nd->path.mnt); + return filp; - /* O_TRUNC implies we need access checks for write permissions */ - if (flag & O_TRUNC) - acc_mode |= MAY_WRITE; +exit: + if (!IS_ERR(nd->intent.open.file)) + release_open_intent(nd); + path_put(&nd->path); + return ERR_PTR(error); +} - /* Allow the LSM permission hook to distinguish append - access from general write access. */ - if (flag & O_APPEND) - acc_mode |= MAY_APPEND; +static struct file *do_last(struct nameidata *nd, struct path *path, + int open_flag, int acc_mode, + int mode, const char *pathname, + int *want_dir) +{ + struct dentry *dir = nd->path.dentry; + struct file *filp; + int error = -EISDIR; - /* - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { - filp = get_empty_filp(); - - if (filp == NULL) - return ERR_PTR(-ENFILE); - nd.intent.open.file = filp; - filp->f_flags = open_flag; - nd.intent.open.flags = flag; - nd.intent.open.create_mode = 0; - error = do_path_lookup(dfd, pathname, - lookup_flags(flag)|LOOKUP_OPEN, &nd); - if (IS_ERR(nd.intent.open.file)) { - if (error == 0) { - error = PTR_ERR(nd.intent.open.file); - path_put(&nd.path); + switch (nd->last_type) { + case LAST_DOTDOT: + follow_dotdot(nd); + dir = nd->path.dentry; + if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { + if (!dir->d_op->d_revalidate(dir, nd)) { + error = -ESTALE; + goto exit; } - } else if (error) - release_open_intent(&nd); - if (error) - return ERR_PTR(error); + } + /* fallthrough */ + case LAST_DOT: + case LAST_ROOT: + if (open_flag & O_CREAT) + goto exit; + /* fallthrough */ + case LAST_BIND: + audit_inode(pathname, dir); goto ok; } - /* - * Create - we need to know the parent. - */ -reval: - error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); - if (error) - return ERR_PTR(error); - if (force_reval) - nd.flags |= LOOKUP_REVAL; - error = path_walk(pathname, &nd); - if (error) { - if (nd.root.mnt) - path_put(&nd.root); - return ERR_PTR(error); + /* trailing slashes? */ + if (nd->last.name[nd->last.len]) { + if (open_flag & O_CREAT) + goto exit; + *want_dir = 1; } - if (unlikely(!audit_dummy_context())) - audit_inode(pathname, nd.path.dentry); - /* - * We have the parent and last component. First of all, check - * that we are not asked to creat(2) an obvious directory - that - * will not do. - */ - error = -EISDIR; - if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) - goto exit_parent; + /* just plain open? */ + if (!(open_flag & O_CREAT)) { + error = do_lookup(nd, &nd->last, path); + if (error) + goto exit; + error = -ENOENT; + if (!path->dentry->d_inode) + goto exit_dput; + if (path->dentry->d_inode->i_op->follow_link) + return NULL; + error = -ENOTDIR; + if (*want_dir & !path->dentry->d_inode->i_op->lookup) + goto exit_dput; + path_to_nameidata(path, nd); + audit_inode(pathname, nd->path.dentry); + goto ok; + } - error = -ENFILE; - filp = get_empty_filp(); - if (filp == NULL) - goto exit_parent; - nd.intent.open.file = filp; - filp->f_flags = open_flag; - nd.intent.open.flags = flag; - nd.intent.open.create_mode = mode; - dir = nd.path.dentry; - nd.flags &= ~LOOKUP_PARENT; - nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN; - if (flag & O_EXCL) - nd.flags |= LOOKUP_EXCL; + /* OK, it's O_CREAT */ mutex_lock(&dir->d_inode->i_mutex); - path.dentry = lookup_hash(&nd); - path.mnt = nd.path.mnt; -do_last: - error = PTR_ERR(path.dentry); - if (IS_ERR(path.dentry)) { + path->dentry = lookup_hash(nd); + path->mnt = nd->path.mnt; + + error = PTR_ERR(path->dentry); + if (IS_ERR(path->dentry)) { mutex_unlock(&dir->d_inode->i_mutex); goto exit; } - if (IS_ERR(nd.intent.open.file)) { - error = PTR_ERR(nd.intent.open.file); + if (IS_ERR(nd->intent.open.file)) { + error = PTR_ERR(nd->intent.open.file); goto exit_mutex_unlock; } /* Negative dentry, just create the file */ - if (!path.dentry->d_inode) { + if (!path->dentry->d_inode) { /* * This write is needed to ensure that a * ro->rw transition does not occur between @@ -1720,18 +1689,16 @@ do_last: * a permanent write count is taken through * the 'struct file' in nameidata_to_filp(). */ - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(nd->path.mnt); if (error) goto exit_mutex_unlock; - error = __open_namei_create(&nd, &path, open_flag, mode); + error = __open_namei_create(nd, path, open_flag, mode); if (error) { - mnt_drop_write(nd.path.mnt); + mnt_drop_write(nd->path.mnt); goto exit; } - filp = nameidata_to_filp(&nd); - mnt_drop_write(nd.path.mnt); - if (nd.root.mnt) - path_put(&nd.root); + filp = nameidata_to_filp(nd); + mnt_drop_write(nd->path.mnt); if (!IS_ERR(filp)) { error = ima_file_check(filp, acc_mode); if (error) { @@ -1746,150 +1713,181 @@ do_last: * It already exists. */ mutex_unlock(&dir->d_inode->i_mutex); - audit_inode(pathname, path.dentry); + audit_inode(pathname, path->dentry); error = -EEXIST; - if (flag & O_EXCL) + if (open_flag & O_EXCL) goto exit_dput; - if (__follow_mount(&path)) { + if (__follow_mount(path)) { error = -ELOOP; - if (flag & O_NOFOLLOW) + if (open_flag & O_NOFOLLOW) goto exit_dput; } error = -ENOENT; - if (!path.dentry->d_inode) + if (!path->dentry->d_inode) goto exit_dput; - if (path.dentry->d_inode->i_op->follow_link) - goto do_link; - path_to_nameidata(&path, &nd); + if (path->dentry->d_inode->i_op->follow_link) + return NULL; + + path_to_nameidata(path, nd); error = -EISDIR; - if (S_ISDIR(path.dentry->d_inode->i_mode)) + if (S_ISDIR(path->dentry->d_inode->i_mode)) goto exit; ok: + filp = finish_open(nd, open_flag, acc_mode); + return filp; + +exit_mutex_unlock: + mutex_unlock(&dir->d_inode->i_mutex); +exit_dput: + path_put_conditional(path, nd); +exit: + if (!IS_ERR(nd->intent.open.file)) + release_open_intent(nd); + path_put(&nd->path); + return ERR_PTR(error); +} + +/* + * Note that the low bits of the passed in "open_flag" + * are not the same as in the local variable "flag". See + * open_to_namei_flags() for more details. + */ +struct file *do_filp_open(int dfd, const char *pathname, + int open_flag, int mode, int acc_mode) +{ + struct file *filp; + struct nameidata nd; + int error; + struct path path; + int count = 0; + int flag = open_to_namei_flags(open_flag); + int force_reval = 0; + int want_dir = open_flag & O_DIRECTORY; + + if (!(open_flag & O_CREAT)) + mode = 0; + /* - * Consider: - * 1. may_open() truncates a file - * 2. a rw->ro mount transition occurs - * 3. nameidata_to_filp() fails due to - * the ro mount. - * That would be inconsistent, and should - * be avoided. Taking this mnt write here - * ensures that (2) can not occur. + * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only + * check for O_DSYNC if the need any syncing at all we enforce it's + * always set instead of having to deal with possibly weird behaviour + * for malicious applications setting only __O_SYNC. */ - will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode); - if (will_truncate) { - error = mnt_want_write(nd.path.mnt); - if (error) - goto exit; - } - error = may_open(&nd.path, acc_mode, open_flag); + if (open_flag & __O_SYNC) + open_flag |= O_DSYNC; + + if (!acc_mode) + acc_mode = MAY_OPEN | ACC_MODE(open_flag); + + /* O_TRUNC implies we need access checks for write permissions */ + if (open_flag & O_TRUNC) + acc_mode |= MAY_WRITE; + + /* Allow the LSM permission hook to distinguish append + access from general write access. */ + if (open_flag & O_APPEND) + acc_mode |= MAY_APPEND; + + /* find the parent */ +reval: + error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); + if (error) + return ERR_PTR(error); + if (force_reval) + nd.flags |= LOOKUP_REVAL; + + current->total_link_count = 0; + error = link_path_walk(pathname, &nd); if (error) { - if (will_truncate) - mnt_drop_write(nd.path.mnt); - goto exit; - } - filp = nameidata_to_filp(&nd); - if (!IS_ERR(filp)) { - error = ima_file_check(filp, acc_mode); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } + filp = ERR_PTR(error); + goto out; } - if (!IS_ERR(filp)) { - if (acc_mode & MAY_WRITE) - vfs_dq_init(nd.path.dentry->d_inode); + if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) + audit_inode(pathname, nd.path.dentry); - if (will_truncate) { - error = handle_truncate(&nd.path); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } - } /* - * It is now safe to drop the mnt write - * because the filp has had a write taken - * on its behalf. + * We have the parent and last component. */ - if (will_truncate) - mnt_drop_write(nd.path.mnt); + + error = -ENFILE; + filp = get_empty_filp(); + if (filp == NULL) + goto exit_parent; + nd.intent.open.file = filp; + filp->f_flags = open_flag; + nd.intent.open.flags = flag; + nd.intent.open.create_mode = mode; + nd.flags &= ~LOOKUP_PARENT; + nd.flags |= LOOKUP_OPEN; + if (open_flag & O_CREAT) { + nd.flags |= LOOKUP_CREATE; + if (open_flag & O_EXCL) + nd.flags |= LOOKUP_EXCL; + } + filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir); + while (unlikely(!filp)) { /* trailing symlink */ + struct path holder; + struct inode *inode = path.dentry->d_inode; + void *cookie; + error = -ELOOP; + /* S_ISDIR part is a temporary automount kludge */ + if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) + goto exit_dput; + if (count++ == 32) + goto exit_dput; + /* + * This is subtle. Instead of calling do_follow_link() we do + * the thing by hands. The reason is that this way we have zero + * link_count and path_walk() (called from ->follow_link) + * honoring LOOKUP_PARENT. After that we have the parent and + * last component, i.e. we are in the same situation as after + * the first path_walk(). Well, almost - if the last component + * is normal we get its copy stored in nd->last.name and we will + * have to putname() it when we are done. Procfs-like symlinks + * just set LAST_BIND. + */ + nd.flags |= LOOKUP_PARENT; + error = security_inode_follow_link(path.dentry, &nd); + if (error) + goto exit_dput; + error = __do_follow_link(&path, &nd, &cookie); + if (unlikely(error)) { + /* nd.path had been dropped */ + if (!IS_ERR(cookie) && inode->i_op->put_link) + inode->i_op->put_link(path.dentry, &nd, cookie); + path_put(&path); + release_open_intent(&nd); + filp = ERR_PTR(error); + goto out; + } + holder = path; + nd.flags &= ~LOOKUP_PARENT; + filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir); + if (inode->i_op->put_link) + inode->i_op->put_link(holder.dentry, &nd, cookie); + path_put(&holder); + } +out: if (nd.root.mnt) path_put(&nd.root); + if (filp == ERR_PTR(-ESTALE) && !force_reval) { + force_reval = 1; + goto reval; + } return filp; -exit_mutex_unlock: - mutex_unlock(&dir->d_inode->i_mutex); exit_dput: path_put_conditional(&path, &nd); -exit: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); exit_parent: - if (nd.root.mnt) - path_put(&nd.root); path_put(&nd.path); - return ERR_PTR(error); - -do_link: - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; - /* - * This is subtle. Instead of calling do_follow_link() we do the - * thing by hands. The reason is that this way we have zero link_count - * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. - * After that we have the parent and last component, i.e. - * we are in the same situation as after the first path_walk(). - * Well, almost - if the last component is normal we get its copy - * stored in nd->last.name and we will have to putname() it when we - * are done. Procfs-like symlinks just set LAST_BIND. - */ - nd.flags |= LOOKUP_PARENT; - error = security_inode_follow_link(path.dentry, &nd); - if (error) - goto exit_dput; - error = __do_follow_link(&path, &nd); - path_put(&path); - if (error) { - /* Does someone understand code flow here? Or it is only - * me so stupid? Anathema to whoever designed this non-sense - * with "intent.open". - */ - release_open_intent(&nd); - if (nd.root.mnt) - path_put(&nd.root); - if (error == -ESTALE && !force_reval) { - force_reval = 1; - goto reval; - } - return ERR_PTR(error); - } - nd.flags &= ~LOOKUP_PARENT; - if (nd.last_type == LAST_BIND) - goto ok; - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit; - if (nd.last.name[nd.last.len]) { - __putname(nd.last.name); - goto exit; - } - error = -ELOOP; - if (count++==32) { - __putname(nd.last.name); - goto exit; - } - dir = nd.path.dentry; - mutex_lock(&dir->d_inode->i_mutex); - path.dentry = lookup_hash(&nd); - path.mnt = nd.path.mnt; - __putname(nd.last.name); - goto do_last; + filp = ERR_PTR(error); + goto out; } /** @@ -1983,7 +1981,6 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if (error) return error; - vfs_dq_init(dir); error = dir->i_op->mknod(dir, dentry, mode, dev); if (!error) fsnotify_create(dir, dentry); @@ -2082,7 +2079,6 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (error) return error; - vfs_dq_init(dir); error = dir->i_op->mkdir(dir, dentry, mode); if (!error) fsnotify_mkdir(dir, dentry); @@ -2168,8 +2164,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (!dir->i_op->rmdir) return -EPERM; - vfs_dq_init(dir); - mutex_lock(&dentry->d_inode->i_mutex); dentry_unhash(dentry); if (d_mountpoint(dentry)) @@ -2255,8 +2249,6 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (!dir->i_op->unlink) return -EPERM; - vfs_dq_init(dir); - mutex_lock(&dentry->d_inode->i_mutex); if (d_mountpoint(dentry)) error = -EBUSY; @@ -2369,7 +2361,6 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) if (error) return error; - vfs_dq_init(dir); error = dir->i_op->symlink(dir, dentry, oldname); if (!error) fsnotify_create(dir, dentry); @@ -2453,7 +2444,6 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de return error; mutex_lock(&inode->i_mutex); - vfs_dq_init(dir); error = dir->i_op->link(old_dentry, dir, new_dentry); mutex_unlock(&inode->i_mutex); if (!error) @@ -2654,9 +2644,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!old_dir->i_op->rename) return -EPERM; - vfs_dq_init(old_dir); - vfs_dq_init(new_dir); - old_name = fsnotify_oldname_init(old_dentry->d_name.name); if (is_dir) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7570573bdb3..7f9ecc46f3f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,16 +97,12 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, int sync) +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - if (sync) { - ret = filemap_fdatawait(inode->i_mapping); - if (ret == 0) - ret = nfs_commit_inode(inode, FLUSH_SYNC); - } else - ret = nfs_commit_inode(inode, 0); + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29e464d23b3..11f82f03c5d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); -extern int nfs_write_inode(struct inode *,int); +extern int nfs_write_inode(struct inode *, struct writeback_control *); extern void nfs_clear_inode(struct inode *); #ifdef CONFIG_NFS_V4 extern void nfs4_clear_inode(struct inode *); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 15dc2deaac5..8eca17df4f6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -20,7 +20,6 @@ #include <linux/fcntl.h> #include <linux/namei.h> #include <linux/delay.h> -#include <linux/quotaops.h> #include <linux/fsnotify.h> #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> @@ -377,7 +376,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, put_write_access(inode); goto out_nfserr; } - vfs_dq_init(inode); } /* sanitize the mode change */ @@ -745,8 +743,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags = O_RDWR|O_LARGEFILE; else flags = O_WRONLY|O_LARGEFILE; - - vfs_dq_init(inode); } *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), flags, current_cred()); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 5a9e34475e3..9173e82a45d 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1545,7 +1545,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, write_inode_now(bmp_vi, !datasync); iput(bmp_vi); } - ret = ntfs_write_inode(vi, 1); + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 43179ddd336..b681c71d706 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2182,7 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(S_ISDIR(vi->i_mode)); if (!datasync || !NInoNonResident(NTFS_I(vi))) - ret = ntfs_write_inode(vi, 1); + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); /* * NOTE: If we were to use mapping->private_list (see ext2 and diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index dc2505abb6d..4b57fb1eac2 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2957,7 +2957,7 @@ out: * * Return 0 on success and -errno on error. */ -int ntfs_write_inode(struct inode *vi, int sync) +int __ntfs_write_inode(struct inode *vi, int sync) { sle64 nt; ntfs_inode *ni = NTFS_I(vi); diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 117eaf8032a..9a113544605 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -307,12 +307,12 @@ extern void ntfs_truncate_vfs(struct inode *vi); extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); -extern int ntfs_write_inode(struct inode *vi, int sync); +extern int __ntfs_write_inode(struct inode *vi, int sync); static inline void ntfs_commit_inode(struct inode *vi) { if (!is_bad_inode(vi)) - ntfs_write_inode(vi, 1); + __ntfs_write_inode(vi, 1); return; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 80b04770e8e..1cf39dfaee7 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -39,6 +39,7 @@ #include "dir.h" #include "debug.h" #include "index.h" +#include "inode.h" #include "aops.h" #include "layout.h" #include "malloc.h" @@ -2662,6 +2663,13 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs) return 0; } +#ifdef NTFS_RW +static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc) +{ + return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL); +} +#endif + /** * The complete super operations. */ diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 2bbe1ecc08c..9f8bd913c51 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5713,7 +5713,7 @@ int ocfs2_remove_btree_range(struct inode *inode, goto out; } - vfs_dq_free_space_nodirty(inode, + dquot_free_space_nodirty(inode, ocfs2_clusters_to_bytes(inode->i_sb, len)); ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc); @@ -6936,7 +6936,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, goto bail; } - vfs_dq_free_space_nodirty(inode, + dquot_free_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - @@ -7301,11 +7301,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, unsigned int page_end; u64 phys; - if (vfs_dq_alloc_space_nodirty(inode, - ocfs2_clusters_to_bytes(osb->sb, 1))) { - ret = -EDQUOT; + ret = dquot_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1)); + if (ret) goto out_commit; - } did_quota = 1; ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, @@ -7381,7 +7380,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, out_commit: if (ret < 0 && did_quota) - vfs_dq_free_space_nodirty(inode, + dquot_free_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, 1)); ocfs2_commit_trans(osb, handle); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 4c2a6d282c4..21441ddb550 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1764,10 +1764,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, wc->w_handle = handle; - if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode, - ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) { - ret = -EDQUOT; - goto out_commit; + if (clusters_to_alloc) { + ret = dquot_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); + if (ret) + goto out_commit; } /* * We don't want this to fail in ocfs2_write_end(), so do it @@ -1810,7 +1811,7 @@ success: return 0; out_quota: if (clusters_to_alloc) - vfs_dq_free_space(inode, + dquot_free_space(inode, ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); out_commit: ocfs2_commit_trans(osb, handle); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 765d66c7098..efd77d071c8 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2964,12 +2964,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, goto out; } - if (vfs_dq_alloc_space_nodirty(dir, - ocfs2_clusters_to_bytes(osb->sb, - alloc + dx_alloc))) { - ret = -EDQUOT; + ret = dquot_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc)); + if (ret) goto out_commit; - } did_quota = 1; if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { @@ -3178,7 +3176,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, out_commit: if (ret < 0 && did_quota) - vfs_dq_free_space_nodirty(dir, bytes_allocated); + dquot_free_space_nodirty(dir, bytes_allocated); ocfs2_commit_trans(osb, handle); @@ -3221,11 +3219,10 @@ static int ocfs2_do_extend_dir(struct super_block *sb, if (extend) { u32 offset = OCFS2_I(dir)->ip_clusters; - if (vfs_dq_alloc_space_nodirty(dir, - ocfs2_clusters_to_bytes(sb, 1))) { - status = -EDQUOT; + status = dquot_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(sb, 1)); + if (status) goto bail; - } did_quota = 1; status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, @@ -3254,7 +3251,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb, status = 0; bail: if (did_quota && status < 0) - vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); + dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); mlog_exit(status); return status; } @@ -3889,11 +3886,10 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, goto out; } - if (vfs_dq_alloc_space_nodirty(dir, - ocfs2_clusters_to_bytes(dir->i_sb, 1))) { - ret = -EDQUOT; + ret = dquot_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(dir->i_sb, 1)); + if (ret) goto out_commit; - } did_quota = 1; ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh, @@ -3983,7 +3979,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, out_commit: if (ret < 0 && did_quota) - vfs_dq_free_space_nodirty(dir, + dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(dir->i_sb, 1)); ocfs2_commit_trans(osb, handle); @@ -4165,11 +4161,10 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, goto out; } - if (vfs_dq_alloc_space_nodirty(dir, - ocfs2_clusters_to_bytes(osb->sb, 1))) { - ret = -EDQUOT; + ret = dquot_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(osb->sb, 1)); + if (ret) goto out_commit; - } did_quota = 1; /* @@ -4229,7 +4224,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, out_commit: if (ret < 0 && did_quota) - vfs_dq_free_space_nodirty(dir, + dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(dir->i_sb, 1)); ocfs2_commit_trans(osb, handle); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5b52547d629..17947dc8341 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -107,6 +107,9 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); + if (file->f_mode & FMODE_WRITE) + dquot_initialize(inode); + spin_lock(&oi->ip_lock); /* Check that the inode hasn't been wiped from disk by another @@ -629,11 +632,10 @@ restart_all: } restarted_transaction: - if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, - clusters_to_add))) { - status = -EDQUOT; + status = dquot_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); + if (status) goto leave; - } did_quota = 1; /* reserve a write to the file entry early on - that we if we @@ -674,7 +676,7 @@ restarted_transaction: clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); spin_unlock(&OCFS2_I(inode)->ip_lock); /* Release unused quota reservation */ - vfs_dq_free_space(inode, + dquot_free_space(inode, ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); did_quota = 0; @@ -710,7 +712,7 @@ restarted_transaction: leave: if (status < 0 && did_quota) - vfs_dq_free_space(inode, + dquot_free_space(inode, ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); if (handle) { ocfs2_commit_trans(osb, handle); @@ -978,6 +980,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; if (size_change) { + dquot_initialize(inode); + status = ocfs2_rw_lock(inode, 1); if (status < 0) { mlog_errno(status); @@ -1020,7 +1024,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) /* * Gather pointers to quota structures so that allocation / * freeing of quota structures happens here and not inside - * vfs_dq_transfer() where we have problems with lock ordering + * dquot_transfer() where we have problems with lock ordering */ if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, @@ -1053,7 +1057,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) mlog_errno(status); goto bail_unlock; } - status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + status = dquot_transfer(inode, attr); if (status < 0) goto bail_commit; } else { diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 88459bdd1ff..278a223aae1 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -665,7 +665,7 @@ static int ocfs2_remove_inode(struct inode *inode, } ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); - vfs_dq_free_inode(inode); + dquot_free_inode(inode); status = ocfs2_free_dinode(handle, inode_alloc_inode, inode_alloc_bh, di); @@ -971,6 +971,8 @@ void ocfs2_delete_inode(struct inode *inode) goto bail; } + dquot_initialize(inode); + if (!ocfs2_inode_is_valid_to_delete(inode)) { /* It's probably not necessary to truncate_inode_pages * here but we do it for safety anyway (it will most @@ -1087,6 +1089,8 @@ void ocfs2_clear_inode(struct inode *inode) mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, "Inode=%lu\n", inode->i_ino); + dquot_drop(inode); + /* To preven remote deletes we hold open lock before, now it * is time to unlock PR and EX open locks. */ ocfs2_open_unlock(inode); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 50fb26a6a5f..d9cd4e373a5 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -212,7 +212,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) } else inode->i_gid = current_fsgid(); inode->i_mode = mode; - vfs_dq_init(inode); + dquot_initialize(inode); return inode; } @@ -244,6 +244,8 @@ static int ocfs2_mknod(struct inode *dir, (unsigned long)dev, dentry->d_name.len, dentry->d_name.name); + dquot_initialize(dir); + /* get our super block */ osb = OCFS2_SB(dir->i_sb); @@ -348,13 +350,9 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - /* We don't use standard VFS wrapper because we don't want vfs_dq_init - * to be called. */ - if (sb_any_quota_active(osb->sb) && - osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { - status = -EDQUOT; + status = dquot_alloc_inode(inode); + if (status) goto leave; - } did_quota_inode = 1; mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, @@ -431,7 +429,7 @@ static int ocfs2_mknod(struct inode *dir, status = 0; leave: if (status < 0 && did_quota_inode) - vfs_dq_free_inode(inode); + dquot_free_inode(inode); if (handle) ocfs2_commit_trans(osb, handle); @@ -636,6 +634,8 @@ static int ocfs2_link(struct dentry *old_dentry, if (S_ISDIR(inode->i_mode)) return -EPERM; + dquot_initialize(dir); + err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); if (err < 0) { if (err != -ENOENT) @@ -791,6 +791,8 @@ static int ocfs2_unlink(struct inode *dir, mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, dentry->d_name.len, dentry->d_name.name); + dquot_initialize(dir); + BUG_ON(dentry->d_parent->d_inode != dir); mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -1051,6 +1053,9 @@ static int ocfs2_rename(struct inode *old_dir, old_dentry->d_name.len, old_dentry->d_name.name, new_dentry->d_name.len, new_dentry->d_name.name); + dquot_initialize(old_dir); + dquot_initialize(new_dir); + osb = OCFS2_SB(old_dir->i_sb); if (new_inode) { @@ -1599,6 +1604,8 @@ static int ocfs2_symlink(struct inode *dir, mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, dentry, symname, dentry->d_name.len, dentry->d_name.name); + dquot_initialize(dir); + sb = dir->i_sb; osb = OCFS2_SB(sb); @@ -1688,13 +1695,9 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } - /* We don't use standard VFS wrapper because we don't want vfs_dq_init - * to be called. */ - if (sb_any_quota_active(osb->sb) && - osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { - status = -EDQUOT; + status = dquot_alloc_inode(inode); + if (status) goto bail; - } did_quota_inode = 1; mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, @@ -1716,11 +1719,10 @@ static int ocfs2_symlink(struct inode *dir, u32 offset = 0; inode->i_op = &ocfs2_symlink_inode_operations; - if (vfs_dq_alloc_space_nodirty(inode, - ocfs2_clusters_to_bytes(osb->sb, 1))) { - status = -EDQUOT; + status = dquot_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1)); + if (status) goto bail; - } did_quota = 1; status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, new_fe_bh, @@ -1788,10 +1790,10 @@ static int ocfs2_symlink(struct inode *dir, d_instantiate(dentry, inode); bail: if (status < 0 && did_quota) - vfs_dq_free_space_nodirty(inode, + dquot_free_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, 1)); if (status < 0 && did_quota_inode) - vfs_dq_free_inode(inode); + dquot_free_inode(inode); if (handle) ocfs2_commit_trans(osb, handle); @@ -2099,13 +2101,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, goto leave; } - /* We don't use standard VFS wrapper because we don't want vfs_dq_init - * to be called. */ - if (sb_any_quota_active(osb->sb) && - osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { - status = -EDQUOT; + status = dquot_alloc_inode(inode); + if (status) goto leave; - } did_quota_inode = 1; inode->i_nlink = 0; @@ -2140,7 +2138,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, insert_inode_hash(inode); leave: if (status < 0 && did_quota_inode) - vfs_dq_free_inode(inode); + dquot_free_inode(inode); if (handle) ocfs2_commit_trans(osb, handle); diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b437dc0c4ca..355f41d1d52 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -851,13 +851,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot) } const struct dquot_operations ocfs2_quota_operations = { - .initialize = dquot_initialize, - .drop = dquot_drop, - .alloc_space = dquot_alloc_space, - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, .write_dquot = ocfs2_write_dquot, .acquire_dquot = ocfs2_acquire_dquot, .release_dquot = ocfs2_release_dquot, diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index fb6aa7acf54..9e96921dffd 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4390,7 +4390,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, } mutex_lock(&inode->i_mutex); - vfs_dq_init(dir); + dquot_initialize(dir); error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); mutex_unlock(&inode->i_mutex); if (!error) diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index f3b7c1541f3..75d9b5ba1d4 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -11,6 +11,7 @@ #include <linux/parser.h> #include <linux/buffer_head.h> #include <linux/vmalloc.h> +#include <linux/writeback.h> #include <linux/crc-itu-t.h> #include "omfs.h" @@ -89,7 +90,7 @@ static void omfs_update_checksums(struct omfs_inode *oi) oi->i_head.h_check_xor = xor; } -static int omfs_write_inode(struct inode *inode, int wait) +static int __omfs_write_inode(struct inode *inode, int wait) { struct omfs_inode *oi; struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); @@ -162,9 +163,14 @@ out: return ret; } +static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int omfs_sync_inode(struct inode *inode) { - return omfs_write_inode(inode, 1); + return __omfs_write_inode(inode, 1); } /* diff --git a/fs/open.c b/fs/open.c index e0b2d88b038..e17f54454b5 100644 --- a/fs/open.c +++ b/fs/open.c @@ -8,7 +8,6 @@ #include <linux/mm.h> #include <linux/file.h> #include <linux/fdtable.h> -#include <linux/quotaops.h> #include <linux/fsnotify.h> #include <linux/module.h> #include <linux/slab.h> @@ -278,10 +277,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) error = security_path_truncate(&path, length, 0); - if (!error) { - vfs_dq_init(inode); + if (!error) error = do_truncate(path.dentry, length, 0, NULL); - } put_write_and_out: put_write_access(inode); diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index efc02ebb8c7..dad7fb247dd 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -59,3 +59,8 @@ config QUOTACTL bool depends on XFS_QUOTA || QUOTA default y + +config QUOTACTL_COMPAT + bool + depends on QUOTACTL && COMPAT_FOR_U64_ALIGNMENT + default y diff --git a/fs/quota/Makefile b/fs/quota/Makefile index 68d4f6dc057..5f9e9e276af 100644 --- a/fs/quota/Makefile +++ b/fs/quota/Makefile @@ -3,3 +3,5 @@ obj-$(CONFIG_QFMT_V1) += quota_v1.o obj-$(CONFIG_QFMT_V2) += quota_v2.o obj-$(CONFIG_QUOTA_TREE) += quota_tree.o obj-$(CONFIG_QUOTACTL) += quota.o +obj-$(CONFIG_QUOTACTL_COMPAT) += compat.o +obj-$(CONFIG_QUOTA_NETLINK_INTERFACE) += netlink.o diff --git a/fs/quota/compat.c b/fs/quota/compat.c new file mode 100644 index 00000000000..fb1892fe3e5 --- /dev/null +++ b/fs/quota/compat.c @@ -0,0 +1,118 @@ + +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <linux/quotaops.h> + +/* + * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64) + * and is necessary due to alignment problems. + */ +struct compat_if_dqblk { + compat_u64 dqb_bhardlimit; + compat_u64 dqb_bsoftlimit; + compat_u64 dqb_curspace; + compat_u64 dqb_ihardlimit; + compat_u64 dqb_isoftlimit; + compat_u64 dqb_curinodes; + compat_u64 dqb_btime; + compat_u64 dqb_itime; + compat_uint_t dqb_valid; +}; + +/* XFS structures */ +struct compat_fs_qfilestat { + compat_u64 dqb_bhardlimit; + compat_u64 qfs_nblks; + compat_uint_t qfs_nextents; +}; + +struct compat_fs_quota_stat { + __s8 qs_version; + __u16 qs_flags; + __s8 qs_pad; + struct compat_fs_qfilestat qs_uquota; + struct compat_fs_qfilestat qs_gquota; + compat_uint_t qs_incoredqs; + compat_int_t qs_btimelimit; + compat_int_t qs_itimelimit; + compat_int_t qs_rtbtimelimit; + __u16 qs_bwarnlimit; + __u16 qs_iwarnlimit; +}; + +asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, + qid_t id, void __user *addr) +{ + unsigned int cmds; + struct if_dqblk __user *dqblk; + struct compat_if_dqblk __user *compat_dqblk; + struct fs_quota_stat __user *fsqstat; + struct compat_fs_quota_stat __user *compat_fsqstat; + compat_uint_t data; + u16 xdata; + long ret; + + cmds = cmd >> SUBCMDSHIFT; + + switch (cmds) { + case Q_GETQUOTA: + dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); + compat_dqblk = addr; + ret = sys_quotactl(cmd, special, id, dqblk); + if (ret) + break; + if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) || + get_user(data, &dqblk->dqb_valid) || + put_user(data, &compat_dqblk->dqb_valid)) + ret = -EFAULT; + break; + case Q_SETQUOTA: + dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); + compat_dqblk = addr; + ret = -EFAULT; + if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) || + get_user(data, &compat_dqblk->dqb_valid) || + put_user(data, &dqblk->dqb_valid)) + break; + ret = sys_quotactl(cmd, special, id, dqblk); + break; + case Q_XGETQSTAT: + fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat)); + compat_fsqstat = addr; + ret = sys_quotactl(cmd, special, id, fsqstat); + if (ret) + break; + ret = -EFAULT; + /* Copying qs_version, qs_flags, qs_pad */ + if (copy_in_user(compat_fsqstat, fsqstat, + offsetof(struct compat_fs_quota_stat, qs_uquota))) + break; + /* Copying qs_uquota */ + if (copy_in_user(&compat_fsqstat->qs_uquota, + &fsqstat->qs_uquota, + sizeof(compat_fsqstat->qs_uquota)) || + get_user(data, &fsqstat->qs_uquota.qfs_nextents) || + put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents)) + break; + /* Copying qs_gquota */ + if (copy_in_user(&compat_fsqstat->qs_gquota, + &fsqstat->qs_gquota, + sizeof(compat_fsqstat->qs_gquota)) || + get_user(data, &fsqstat->qs_gquota.qfs_nextents) || + put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents)) + break; + /* Copying the rest */ + if (copy_in_user(&compat_fsqstat->qs_incoredqs, + &fsqstat->qs_incoredqs, + sizeof(struct compat_fs_quota_stat) - + offsetof(struct compat_fs_quota_stat, qs_incoredqs)) || + get_user(xdata, &fsqstat->qs_iwarnlimit) || + put_user(xdata, &compat_fsqstat->qs_iwarnlimit)) + break; + ret = 0; + break; + default: + ret = sys_quotactl(cmd, special, id, addr); + } + return ret; +} diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3fc62b097be..e0b870f4749 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -100,9 +100,13 @@ * * Any operation working on dquots via inode pointers must hold dqptr_sem. If * operation is just reading pointers from inode (or not using them at all) the - * read lock is enough. If pointers are altered function must hold write lock - * (these locking rules also apply for S_NOQUOTA flag in the inode - note that - * for altering the flag i_mutex is also needed). + * read lock is enough. If pointers are altered function must hold write lock. + * Special care needs to be taken about S_NOQUOTA inode flag (marking that + * inode is a quota file). Functions adding pointers from inode to dquots have + * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they + * have to do all pointer modifications before dropping dqptr_sem. This makes + * sure they cannot race with quotaon which first sets S_NOQUOTA flag and + * then drops all pointers to dquots from an inode. * * Each dquot has its dq_lock mutex. Locked dquots might not be referenced * from inodes (dquot_alloc_space() and such don't check the dq_lock). @@ -225,6 +229,9 @@ static struct hlist_head *dquot_hash; struct dqstats dqstats; EXPORT_SYMBOL(dqstats); +static qsize_t inode_get_rsv_space(struct inode *inode); +static void __dquot_initialize(struct inode *inode, int type); + static inline unsigned int hashfn(const struct super_block *sb, unsigned int id, int type) { @@ -564,7 +571,7 @@ out: } EXPORT_SYMBOL(dquot_scan_active); -int vfs_quota_sync(struct super_block *sb, int type) +int vfs_quota_sync(struct super_block *sb, int type, int wait) { struct list_head *dirty; struct dquot *dquot; @@ -609,6 +616,33 @@ int vfs_quota_sync(struct super_block *sb, int type) spin_unlock(&dq_list_lock); mutex_unlock(&dqopt->dqonoff_mutex); + if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) + return 0; + + /* This is not very clever (and fast) but currently I don't know about + * any other simple way of getting quota data to disk and we must get + * them there for userspace to be visible... */ + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + sync_blockdev(sb->s_bdev); + + /* + * Now when everything is written we can discard the pagecache so + * that userspace sees the changes. + */ + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + if (!sb_has_quota_active(sb, cnt)) + continue; + mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, + I_MUTEX_QUOTA); + truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0); + mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex); + } + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + return 0; } EXPORT_SYMBOL(vfs_quota_sync); @@ -840,11 +874,14 @@ static int dqinit_needed(struct inode *inode, int type) static void add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; + int reserved = 0; spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) continue; + if (unlikely(inode_get_rsv_space(inode) > 0)) + reserved = 1; if (!atomic_read(&inode->i_writecount)) continue; if (!dqinit_needed(inode, type)) @@ -854,7 +891,7 @@ static void add_dquot_ref(struct super_block *sb, int type) spin_unlock(&inode_lock); iput(old_inode); - sb->dq_op->initialize(inode, type); + __dquot_initialize(inode, type); /* We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the inode_lock. * We cannot iput the inode now as we can be holding the last @@ -865,6 +902,12 @@ static void add_dquot_ref(struct super_block *sb, int type) } spin_unlock(&inode_lock); iput(old_inode); + + if (reserved) { + printk(KERN_WARNING "VFS (%s): Writes happened before quota" + " was turned on thus quota information is probably " + "inconsistent. Please run quotacheck(8).\n", sb->s_id); + } } /* @@ -978,10 +1021,12 @@ static inline void dquot_resv_space(struct dquot *dquot, qsize_t number) /* * Claim reserved quota space */ -static void dquot_claim_reserved_space(struct dquot *dquot, - qsize_t number) +static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number) { - WARN_ON(dquot->dq_dqb.dqb_rsvspace < number); + if (dquot->dq_dqb.dqb_rsvspace < number) { + WARN_ON_ONCE(1); + number = dquot->dq_dqb.dqb_rsvspace; + } dquot->dq_dqb.dqb_curspace += number; dquot->dq_dqb.dqb_rsvspace -= number; } @@ -989,7 +1034,12 @@ static void dquot_claim_reserved_space(struct dquot *dquot, static inline void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) { - dquot->dq_dqb.dqb_rsvspace -= number; + if (dquot->dq_dqb.dqb_rsvspace >= number) + dquot->dq_dqb.dqb_rsvspace -= number; + else { + WARN_ON_ONCE(1); + dquot->dq_dqb.dqb_rsvspace = 0; + } } static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) @@ -1131,13 +1181,13 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) *warntype = QUOTA_NL_NOWARN; if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) - return QUOTA_OK; + return 0; if (dquot->dq_dqb.dqb_ihardlimit && newinodes > dquot->dq_dqb.dqb_ihardlimit && !ignore_hardlimit(dquot)) { *warntype = QUOTA_NL_IHARDWARN; - return NO_QUOTA; + return -EDQUOT; } if (dquot->dq_dqb.dqb_isoftlimit && @@ -1146,7 +1196,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) get_seconds() >= dquot->dq_dqb.dqb_itime && !ignore_hardlimit(dquot)) { *warntype = QUOTA_NL_ISOFTLONGWARN; - return NO_QUOTA; + return -EDQUOT; } if (dquot->dq_dqb.dqb_isoftlimit && @@ -1157,7 +1207,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; } - return QUOTA_OK; + return 0; } /* needs dq_data_lock */ @@ -1169,7 +1219,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war *warntype = QUOTA_NL_NOWARN; if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) - return QUOTA_OK; + return 0; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace + space; @@ -1179,7 +1229,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war !ignore_hardlimit(dquot)) { if (!prealloc) *warntype = QUOTA_NL_BHARDWARN; - return NO_QUOTA; + return -EDQUOT; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1189,7 +1239,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war !ignore_hardlimit(dquot)) { if (!prealloc) *warntype = QUOTA_NL_BSOFTLONGWARN; - return NO_QUOTA; + return -EDQUOT; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1205,10 +1255,10 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war * We don't allow preallocation to exceed softlimit so exceeding will * be always printed */ - return NO_QUOTA; + return -EDQUOT; } - return QUOTA_OK; + return 0; } static int info_idq_free(struct dquot *dquot, qsize_t inodes) @@ -1242,25 +1292,32 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } + /* - * Initialize quota pointers in inode - * We do things in a bit complicated way but by that we avoid calling - * dqget() and thus filesystem callbacks under dqptr_sem. + * Initialize quota pointers in inode + * + * We do things in a bit complicated way but by that we avoid calling + * dqget() and thus filesystem callbacks under dqptr_sem. + * + * It is better to call this function outside of any transaction as it + * might need a lot of space in journal for dquot structure allocation. */ -int dquot_initialize(struct inode *inode, int type) +static void __dquot_initialize(struct inode *inode, int type) { unsigned int id = 0; - int cnt, ret = 0; - struct dquot *got[MAXQUOTAS] = { NULL, NULL }; + int cnt; + struct dquot *got[MAXQUOTAS]; struct super_block *sb = inode->i_sb; + qsize_t rsv; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (IS_NOQUOTA(inode)) - return 0; + if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) + return; /* First get references to structures we might need. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + got[cnt] = NULL; if (type != -1 && cnt != type) continue; switch (cnt) { @@ -1275,7 +1332,6 @@ int dquot_initialize(struct inode *inode, int type) } down_write(&sb_dqopt(sb)->dqptr_sem); - /* Having dqptr_sem we know NOQUOTA flags can't be altered... */ if (IS_NOQUOTA(inode)) goto out_err; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1287,20 +1343,31 @@ int dquot_initialize(struct inode *inode, int type) if (!inode->i_dquot[cnt]) { inode->i_dquot[cnt] = got[cnt]; got[cnt] = NULL; + /* + * Make quota reservation system happy if someone + * did a write before quota was turned on + */ + rsv = inode_get_rsv_space(inode); + if (unlikely(rsv)) + dquot_resv_space(inode->i_dquot[cnt], rsv); } } out_err: up_write(&sb_dqopt(sb)->dqptr_sem); /* Drop unused references */ dqput_all(got); - return ret; +} + +void dquot_initialize(struct inode *inode) +{ + __dquot_initialize(inode, -1); } EXPORT_SYMBOL(dquot_initialize); /* * Release all quotas referenced by inode */ -int dquot_drop(struct inode *inode) +static void __dquot_drop(struct inode *inode) { int cnt; struct dquot *put[MAXQUOTAS]; @@ -1312,32 +1379,31 @@ int dquot_drop(struct inode *inode) } up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); dqput_all(put); - return 0; } -EXPORT_SYMBOL(dquot_drop); -/* Wrapper to remove references to quota structures from inode */ -void vfs_dq_drop(struct inode *inode) -{ - /* Here we can get arbitrary inode from clear_inode() so we have - * to be careful. OTOH we don't need locking as quota operations - * are allowed to change only at mount time */ - if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op - && inode->i_sb->dq_op->drop) { - int cnt; - /* Test before calling to rule out calls from proc and such - * where we are not allowed to block. Note that this is - * actually reliable test even without the lock - the caller - * must assure that nobody can come after the DQUOT_DROP and - * add quota pointers back anyway */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (inode->i_dquot[cnt]) - break; - if (cnt < MAXQUOTAS) - inode->i_sb->dq_op->drop(inode); - } -} -EXPORT_SYMBOL(vfs_dq_drop); +void dquot_drop(struct inode *inode) +{ + int cnt; + + if (IS_NOQUOTA(inode)) + return; + + /* + * Test before calling to rule out calls from proc and such + * where we are not allowed to block. Note that this is + * actually reliable test even without the lock - the caller + * must assure that nobody can come after the DQUOT_DROP and + * add quota pointers back anyway. + */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (inode->i_dquot[cnt]) + break; + } + + if (cnt < MAXQUOTAS) + __dquot_drop(inode); +} +EXPORT_SYMBOL(dquot_drop); /* * inode_reserved_space is managed internally by quota, and protected by @@ -1351,28 +1417,30 @@ static qsize_t *inode_reserved_space(struct inode * inode) return inode->i_sb->dq_op->get_reserved_space(inode); } -static void inode_add_rsv_space(struct inode *inode, qsize_t number) +void inode_add_rsv_space(struct inode *inode, qsize_t number) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) += number; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL(inode_add_rsv_space); - -static void inode_claim_rsv_space(struct inode *inode, qsize_t number) +void inode_claim_rsv_space(struct inode *inode, qsize_t number) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL(inode_claim_rsv_space); -static void inode_sub_rsv_space(struct inode *inode, qsize_t number) +void inode_sub_rsv_space(struct inode *inode, qsize_t number) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL(inode_sub_rsv_space); static qsize_t inode_get_rsv_space(struct inode *inode) { @@ -1404,38 +1472,34 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) } /* - * Following four functions update i_blocks+i_bytes fields and - * quota information (together with appropriate checks) - * NOTE: We absolutely rely on the fact that caller dirties - * the inode (usually macros in quotaops.h care about this) and - * holds a handle for the current transaction so that dquot write and - * inode write go into the same transaction. + * This functions updates i_blocks+i_bytes fields and quota information + * (together with appropriate checks). + * + * NOTE: We absolutely rely on the fact that caller dirties the inode + * (usually helpers in quotaops.h care about this) and holds a handle for + * the current transaction so that dquot write and inode write go into the + * same transaction. */ /* * This operation can block, but only after everything is updated */ int __dquot_alloc_space(struct inode *inode, qsize_t number, - int warn, int reserve) + int warn, int reserve) { - int cnt, ret = QUOTA_OK; + int cnt, ret = 0; char warntype[MAXQUOTAS]; /* * First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (IS_NOQUOTA(inode)) { + if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { inode_incr_space(inode, number, reserve); goto out; } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) { - inode_incr_space(inode, number, reserve); - goto out_unlock; - } - for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; @@ -1443,9 +1507,9 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; - if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) - == NO_QUOTA) { - ret = NO_QUOTA; + ret = check_bdq(inode->i_dquot[cnt], number, !warn, + warntype+cnt); + if (ret) { spin_unlock(&dq_data_lock); goto out_flush_warn; } @@ -1466,61 +1530,45 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, mark_all_dquot_dirty(inode->i_dquot); out_flush_warn: flush_warnings(inode->i_dquot, warntype); -out_unlock: up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); out: return ret; } - -int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) -{ - return __dquot_alloc_space(inode, number, warn, 0); -} -EXPORT_SYMBOL(dquot_alloc_space); - -int dquot_reserve_space(struct inode *inode, qsize_t number, int warn) -{ - return __dquot_alloc_space(inode, number, warn, 1); -} -EXPORT_SYMBOL(dquot_reserve_space); +EXPORT_SYMBOL(__dquot_alloc_space); /* * This operation can block, but only after everything is updated */ -int dquot_alloc_inode(const struct inode *inode, qsize_t number) +int dquot_alloc_inode(const struct inode *inode) { - int cnt, ret = NO_QUOTA; + int cnt, ret = 0; char warntype[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (IS_NOQUOTA(inode)) - return QUOTA_OK; + if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) + return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - return QUOTA_OK; - } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; - if (check_idq(inode->i_dquot[cnt], number, warntype+cnt) - == NO_QUOTA) + ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt); + if (ret) goto warn_put_all; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; - dquot_incr_inodes(inode->i_dquot[cnt], number); + dquot_incr_inodes(inode->i_dquot[cnt], 1); } - ret = QUOTA_OK; + warn_put_all: spin_unlock(&dq_data_lock); - if (ret == QUOTA_OK) + if (ret == 0) mark_all_dquot_dirty(inode->i_dquot); flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); @@ -1528,23 +1576,19 @@ warn_put_all: } EXPORT_SYMBOL(dquot_alloc_inode); -int dquot_claim_space(struct inode *inode, qsize_t number) +/* + * Convert in-memory reserved quotas to real consumed quotas + */ +int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { int cnt; - int ret = QUOTA_OK; - if (IS_NOQUOTA(inode)) { + if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { inode_claim_rsv_space(inode, number); - goto out; + return 0; } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - inode_claim_rsv_space(inode, number); - goto out; - } - spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1557,33 +1601,26 @@ int dquot_claim_space(struct inode *inode, qsize_t number) spin_unlock(&dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); -out: - return ret; + return 0; } -EXPORT_SYMBOL(dquot_claim_space); +EXPORT_SYMBOL(dquot_claim_space_nodirty); /* * This operation can block, but only after everything is updated */ -int __dquot_free_space(struct inode *inode, qsize_t number, int reserve) +void __dquot_free_space(struct inode *inode, qsize_t number, int reserve) { unsigned int cnt; char warntype[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (IS_NOQUOTA(inode)) { -out_sub: + if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { inode_decr_space(inode, number, reserve); - return QUOTA_OK; + return; } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Now recheck reliably when holding dqptr_sem */ - if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - goto out_sub; - } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) @@ -1603,56 +1640,34 @@ out_sub: out_unlock: flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - return QUOTA_OK; -} - -int dquot_free_space(struct inode *inode, qsize_t number) -{ - return __dquot_free_space(inode, number, 0); } -EXPORT_SYMBOL(dquot_free_space); - -/* - * Release reserved quota space - */ -void dquot_release_reserved_space(struct inode *inode, qsize_t number) -{ - __dquot_free_space(inode, number, 1); - -} -EXPORT_SYMBOL(dquot_release_reserved_space); +EXPORT_SYMBOL(__dquot_free_space); /* * This operation can block, but only after everything is updated */ -int dquot_free_inode(const struct inode *inode, qsize_t number) +void dquot_free_inode(const struct inode *inode) { unsigned int cnt; char warntype[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (IS_NOQUOTA(inode)) - return QUOTA_OK; + if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) + return; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Now recheck reliably when holding dqptr_sem */ - if (IS_NOQUOTA(inode)) { - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - return QUOTA_OK; - } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; - warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number); - dquot_decr_inodes(inode->i_dquot[cnt], number); + warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); + dquot_decr_inodes(inode->i_dquot[cnt], 1); } spin_unlock(&dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - return QUOTA_OK; } EXPORT_SYMBOL(dquot_free_inode); @@ -1662,37 +1677,31 @@ EXPORT_SYMBOL(dquot_free_inode); * This operation can block, but only after everything is updated * A transaction must be started when entering this function. */ -int dquot_transfer(struct inode *inode, struct iattr *iattr) +static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask) { qsize_t space, cur_space; qsize_t rsv_space = 0; struct dquot *transfer_from[MAXQUOTAS]; struct dquot *transfer_to[MAXQUOTAS]; - int cnt, ret = QUOTA_OK; - int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid, - chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid; + int cnt, ret = 0; char warntype_to[MAXQUOTAS]; char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) - return QUOTA_OK; + return 0; /* Initialize the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { transfer_from[cnt] = NULL; transfer_to[cnt] = NULL; warntype_to[cnt] = QUOTA_NL_NOWARN; } - if (chuid) - transfer_to[USRQUOTA] = dqget(inode->i_sb, iattr->ia_uid, - USRQUOTA); - if (chgid) - transfer_to[GRPQUOTA] = dqget(inode->i_sb, iattr->ia_gid, - GRPQUOTA); - + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (mask & (1 << cnt)) + transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt); + } down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); goto put_all; @@ -1706,9 +1715,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) if (!transfer_to[cnt]) continue; transfer_from[cnt] = inode->i_dquot[cnt]; - if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) == - NO_QUOTA || check_bdq(transfer_to[cnt], space, 0, - warntype_to + cnt) == NO_QUOTA) + ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt); + if (ret) + goto over_quota; + ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt); + if (ret) goto over_quota; } @@ -1762,22 +1773,32 @@ over_quota: /* Clear dquot pointers we don't want to dqput() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) transfer_from[cnt] = NULL; - ret = NO_QUOTA; goto warn_put_all; } -EXPORT_SYMBOL(dquot_transfer); -/* Wrapper for transferring ownership of an inode */ -int vfs_dq_transfer(struct inode *inode, struct iattr *iattr) +/* Wrapper for transferring ownership of an inode for uid/gid only + * Called from FSXXX_setattr() + */ +int dquot_transfer(struct inode *inode, struct iattr *iattr) { + qid_t chid[MAXQUOTAS]; + unsigned long mask = 0; + + if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) { + mask |= 1 << USRQUOTA; + chid[USRQUOTA] = iattr->ia_uid; + } + if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) { + mask |= 1 << GRPQUOTA; + chid[GRPQUOTA] = iattr->ia_gid; + } if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { - vfs_dq_init(inode); - if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) - return 1; + dquot_initialize(inode); + return __dquot_transfer(inode, chid, mask); } return 0; } -EXPORT_SYMBOL(vfs_dq_transfer); +EXPORT_SYMBOL(dquot_transfer); /* * Write info of quota file to disk @@ -1798,13 +1819,6 @@ EXPORT_SYMBOL(dquot_commit_info); * Definitions of diskquota operations. */ const struct dquot_operations dquot_operations = { - .initialize = dquot_initialize, - .drop = dquot_drop, - .alloc_space = dquot_alloc_space, - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, .write_dquot = dquot_commit, .acquire_dquot = dquot_acquire, .release_dquot = dquot_release, @@ -1815,6 +1829,20 @@ const struct dquot_operations dquot_operations = { }; /* + * Generic helper for ->open on filesystems supporting disk quotas. + */ +int dquot_file_open(struct inode *inode, struct file *file) +{ + int error; + + error = generic_file_open(inode, file); + if (!error && (file->f_mode & FMODE_WRITE)) + dquot_initialize(inode); + return error; +} +EXPORT_SYMBOL(dquot_file_open); + +/* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags) @@ -1993,11 +2021,13 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { - /* As we bypass the pagecache we must now flush the inode so - * that we see all the changes from userspace... */ - write_inode_now(inode, 1); - /* And now flush the block cache so that kernel sees the - * changes */ + /* As we bypass the pagecache we must now flush all the + * dirty data and invalidate caches so that kernel sees + * changes from userspace. It is not enough to just flush + * the quota file since if blocksize < pagesize, invalidation + * of the cache could fail because of other unrelated dirty + * data */ + sync_filesystem(sb); invalidate_bdev(sb->s_bdev); } mutex_lock(&dqopt->dqonoff_mutex); @@ -2010,14 +2040,16 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, /* We don't want quota and atime on quota files (deadlocks * possible) Also nobody should write to the file - we use * special IO operations which ignore the immutable bit. */ - down_write(&dqopt->dqptr_sem); mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA); inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; mutex_unlock(&inode->i_mutex); - up_write(&dqopt->dqptr_sem); - sb->dq_op->drop(inode); + /* + * When S_NOQUOTA is set, remove dquot references as no more + * references can be added + */ + __dquot_drop(inode); } error = -EIO; @@ -2053,14 +2085,12 @@ out_file_init: iput(inode); out_lock: if (oldflags != -1) { - down_write(&dqopt->dqptr_sem); mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); /* Set the flags back (in the case of accidental quotaon() * on a wrong file we don't want to mess up the flags) */ inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); inode->i_flags |= oldflags; mutex_unlock(&inode->i_mutex); - up_write(&dqopt->dqptr_sem); } mutex_unlock(&dqopt->dqonoff_mutex); out_fmt: diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c new file mode 100644 index 00000000000..2663ed90fb0 --- /dev/null +++ b/fs/quota/netlink.c @@ -0,0 +1,95 @@ + +#include <linux/cred.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/quotaops.h> +#include <linux/sched.h> +#include <net/netlink.h> +#include <net/genetlink.h> + +/* Netlink family structure for quota */ +static struct genl_family quota_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "VFS_DQUOT", + .version = 1, + .maxattr = QUOTA_NL_A_MAX, +}; + +/** + * quota_send_warning - Send warning to userspace about exceeded quota + * @type: The quota type: USRQQUOTA, GRPQUOTA,... + * @id: The user or group id of the quota that was exceeded + * @dev: The device on which the fs is mounted (sb->s_dev) + * @warntype: The type of the warning: QUOTA_NL_... + * + * This can be used by filesystems (including those which don't use + * dquot) to send a message to userspace relating to quota limits. + * + */ + +void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + static atomic_t seq; + struct sk_buff *skb; + void *msg_head; + int ret; + int msg_size = 4 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u64)); + + /* We have to allocate using GFP_NOFS as we are called from a + * filesystem performing write and thus further recursion into + * the fs to free some data could cause deadlocks. */ + skb = genlmsg_new(msg_size, GFP_NOFS); + if (!skb) { + printk(KERN_ERR + "VFS: Not enough memory to send quota warning.\n"); + return; + } + msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), + "a_genl_family, 0, QUOTA_NL_C_WARNING); + if (!msg_head) { + printk(KERN_ERR + "VFS: Cannot store netlink header in quota warning.\n"); + goto err_out; + } + ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); + if (ret) + goto attr_err_out; + genlmsg_end(skb, msg_head); + + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + return; +attr_err_out: + printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); +err_out: + kfree_skb(skb); +} +EXPORT_SYMBOL(quota_send_warning); + +static int __init quota_init(void) +{ + if (genl_register_family("a_genl_family) != 0) + printk(KERN_ERR + "VFS: Failed to create quota netlink interface.\n"); + return 0; +}; + +module_init(quota_init); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index ee91e275695..95388f9b735 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -10,7 +10,6 @@ #include <linux/slab.h> #include <asm/current.h> #include <asm/uaccess.h> -#include <linux/compat.h> #include <linux/kernel.h> #include <linux/security.h> #include <linux/syscalls.h> @@ -18,220 +17,205 @@ #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/types.h> -#include <net/netlink.h> -#include <net/genetlink.h> +#include <linux/writeback.h> -/* Check validity of generic quotactl commands */ -static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, - qid_t id) +static int check_quotactl_permission(struct super_block *sb, int type, int cmd, + qid_t id) { - if (type >= MAXQUOTAS) - return -EINVAL; - if (!sb && cmd != Q_SYNC) - return -ENODEV; - /* Is operation supported? */ - if (sb && !sb->s_qcop) - return -ENOSYS; - switch (cmd) { - case Q_GETFMT: - break; - case Q_QUOTAON: - if (!sb->s_qcop->quota_on) - return -ENOSYS; - break; - case Q_QUOTAOFF: - if (!sb->s_qcop->quota_off) - return -ENOSYS; - break; - case Q_SETINFO: - if (!sb->s_qcop->set_info) - return -ENOSYS; - break; - case Q_GETINFO: - if (!sb->s_qcop->get_info) - return -ENOSYS; - break; - case Q_SETQUOTA: - if (!sb->s_qcop->set_dqblk) - return -ENOSYS; - break; - case Q_GETQUOTA: - if (!sb->s_qcop->get_dqblk) - return -ENOSYS; - break; - case Q_SYNC: - if (sb && !sb->s_qcop->quota_sync) - return -ENOSYS; + /* these commands do not require any special privilegues */ + case Q_GETFMT: + case Q_SYNC: + case Q_GETINFO: + case Q_XGETQSTAT: + case Q_XQUOTASYNC: + break; + /* allow to query information for dquots we "own" */ + case Q_GETQUOTA: + case Q_XGETQUOTA: + if ((type == USRQUOTA && current_euid() == id) || + (type == GRPQUOTA && in_egroup_p(id))) break; - default: - return -EINVAL; + /*FALLTHROUGH*/ + default: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; } - /* Is quota turned on for commands which need it? */ - switch (cmd) { - case Q_GETFMT: - case Q_GETINFO: - case Q_SETINFO: - case Q_SETQUOTA: - case Q_GETQUOTA: - /* This is just an informative test so we are satisfied - * without the lock */ - if (!sb_has_quota_active(sb, type)) - return -ESRCH; - } + return security_quotactl(cmd, type, id, sb); +} - /* Check privileges */ - if (cmd == Q_GETQUOTA) { - if (((type == USRQUOTA && current_euid() != id) || - (type == GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; +static int quota_sync_all(int type) +{ + struct super_block *sb; + int ret; + + if (type >= MAXQUOTAS) + return -EINVAL; + ret = security_quotactl(Q_SYNC, type, 0, NULL); + if (ret) + return ret; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (!sb->s_qcop || !sb->s_qcop->quota_sync) + continue; + + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) + sb->s_qcop->quota_sync(sb, type, 1); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } - else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO) - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + spin_unlock(&sb_lock); return 0; } -/* Check validity of XFS Quota Manager commands */ -static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, - qid_t id) +static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, + void __user *addr) { - if (type >= XQM_MAXQUOTAS) - return -EINVAL; - if (!sb) - return -ENODEV; - if (!sb->s_qcop) - return -ENOSYS; + char *pathname; + int ret = -ENOSYS; + + pathname = getname(addr); + if (IS_ERR(pathname)) + return PTR_ERR(pathname); + if (sb->s_qcop->quota_on) + ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0); + putname(pathname); + return ret; +} - switch (cmd) { - case Q_XQUOTAON: - case Q_XQUOTAOFF: - case Q_XQUOTARM: - if (!sb->s_qcop->set_xstate) - return -ENOSYS; - break; - case Q_XGETQSTAT: - if (!sb->s_qcop->get_xstate) - return -ENOSYS; - break; - case Q_XSETQLIM: - if (!sb->s_qcop->set_xquota) - return -ENOSYS; - break; - case Q_XGETQUOTA: - if (!sb->s_qcop->get_xquota) - return -ENOSYS; - break; - case Q_XQUOTASYNC: - if (!sb->s_qcop->quota_sync) - return -ENOSYS; - break; - default: - return -EINVAL; - } +static int quota_getfmt(struct super_block *sb, int type, void __user *addr) +{ + __u32 fmt; - /* Check privileges */ - if (cmd == Q_XGETQUOTA) { - if (((type == XQM_USRQUOTA && current_euid() != id) || - (type == XQM_GRPQUOTA && !in_egroup_p(id))) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) { - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + down_read(&sb_dqopt(sb)->dqptr_sem); + if (!sb_has_quota_active(sb, type)) { + up_read(&sb_dqopt(sb)->dqptr_sem); + return -ESRCH; } + fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; + up_read(&sb_dqopt(sb)->dqptr_sem); + if (copy_to_user(addr, &fmt, sizeof(fmt))) + return -EFAULT; + return 0; +} +static int quota_getinfo(struct super_block *sb, int type, void __user *addr) +{ + struct if_dqinfo info; + int ret; + + if (!sb_has_quota_active(sb, type)) + return -ESRCH; + if (!sb->s_qcop->get_info) + return -ENOSYS; + ret = sb->s_qcop->get_info(sb, type, &info); + if (!ret && copy_to_user(addr, &info, sizeof(info))) + return -EFAULT; + return ret; +} + +static int quota_setinfo(struct super_block *sb, int type, void __user *addr) +{ + struct if_dqinfo info; + + if (copy_from_user(&info, addr, sizeof(info))) + return -EFAULT; + if (!sb_has_quota_active(sb, type)) + return -ESRCH; + if (!sb->s_qcop->set_info) + return -ENOSYS; + return sb->s_qcop->set_info(sb, type, &info); +} + +static int quota_getquota(struct super_block *sb, int type, qid_t id, + void __user *addr) +{ + struct if_dqblk idq; + int ret; + + if (!sb_has_quota_active(sb, type)) + return -ESRCH; + if (!sb->s_qcop->get_dqblk) + return -ENOSYS; + ret = sb->s_qcop->get_dqblk(sb, type, id, &idq); + if (ret) + return ret; + if (copy_to_user(addr, &idq, sizeof(idq))) + return -EFAULT; return 0; } -static int check_quotactl_valid(struct super_block *sb, int type, int cmd, - qid_t id) +static int quota_setquota(struct super_block *sb, int type, qid_t id, + void __user *addr) { - int error; - - if (XQM_COMMAND(cmd)) - error = xqm_quotactl_valid(sb, type, cmd, id); - else - error = generic_quotactl_valid(sb, type, cmd, id); - if (!error) - error = security_quotactl(cmd, type, id, sb); - return error; + struct if_dqblk idq; + + if (copy_from_user(&idq, addr, sizeof(idq))) + return -EFAULT; + if (!sb_has_quota_active(sb, type)) + return -ESRCH; + if (!sb->s_qcop->set_dqblk) + return -ENOSYS; + return sb->s_qcop->set_dqblk(sb, type, id, &idq); } -#ifdef CONFIG_QUOTA -void sync_quota_sb(struct super_block *sb, int type) +static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) { - int cnt; + __u32 flags; - if (!sb->s_qcop->quota_sync) - return; + if (copy_from_user(&flags, addr, sizeof(flags))) + return -EFAULT; + if (!sb->s_qcop->set_xstate) + return -ENOSYS; + return sb->s_qcop->set_xstate(sb, flags, cmd); +} - sb->s_qcop->quota_sync(sb, type); +static int quota_getxstate(struct super_block *sb, void __user *addr) +{ + struct fs_quota_stat fqs; + int ret; - if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) - return; - /* This is not very clever (and fast) but currently I don't know about - * any other simple way of getting quota data to disk and we must get - * them there for userspace to be visible... */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (!sb->s_qcop->get_xstate) + return -ENOSYS; + ret = sb->s_qcop->get_xstate(sb, &fqs); + if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) + return -EFAULT; + return ret; +} - /* - * Now when everything is written we can discard the pagecache so - * that userspace sees the changes. - */ - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (type != -1 && cnt != type) - continue; - if (!sb_has_quota_active(sb, cnt)) - continue; - mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, - I_MUTEX_QUOTA); - truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0); - mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex); - } - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); +static int quota_setxquota(struct super_block *sb, int type, qid_t id, + void __user *addr) +{ + struct fs_disk_quota fdq; + + if (copy_from_user(&fdq, addr, sizeof(fdq))) + return -EFAULT; + if (!sb->s_qcop->set_xquota) + return -ENOSYS; + return sb->s_qcop->set_xquota(sb, type, id, &fdq); } -#endif -static void sync_dquots(int type) +static int quota_getxquota(struct super_block *sb, int type, qid_t id, + void __user *addr) { - struct super_block *sb; - int cnt; + struct fs_disk_quota fdq; + int ret; - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - /* This test just improves performance so it needn't be - * reliable... */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (type != -1 && type != cnt) - continue; - if (!sb_has_quota_active(sb, cnt)) - continue; - if (!info_dirty(&sb_dqopt(sb)->info[cnt]) && - list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list)) - continue; - break; - } - if (cnt == MAXQUOTAS) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) - sync_quota_sb(sb, type); - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); + if (!sb->s_qcop->get_xquota) + return -ENOSYS; + ret = sb->s_qcop->get_xquota(sb, type, id, &fdq); + if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) + return -EFAULT; + return ret; } /* Copy parameters and call proper function */ @@ -240,117 +224,55 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, { int ret; + if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) + return -EINVAL; + if (!sb->s_qcop) + return -ENOSYS; + + ret = check_quotactl_permission(sb, type, cmd, id); + if (ret < 0) + return ret; + switch (cmd) { - case Q_QUOTAON: { - char *pathname; - - pathname = getname(addr); - if (IS_ERR(pathname)) - return PTR_ERR(pathname); - ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0); - putname(pathname); - return ret; - } - case Q_QUOTAOFF: - return sb->s_qcop->quota_off(sb, type, 0); - - case Q_GETFMT: { - __u32 fmt; - - down_read(&sb_dqopt(sb)->dqptr_sem); - if (!sb_has_quota_active(sb, type)) { - up_read(&sb_dqopt(sb)->dqptr_sem); - return -ESRCH; - } - fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; - up_read(&sb_dqopt(sb)->dqptr_sem); - if (copy_to_user(addr, &fmt, sizeof(fmt))) - return -EFAULT; - return 0; - } - case Q_GETINFO: { - struct if_dqinfo info; - - ret = sb->s_qcop->get_info(sb, type, &info); - if (ret) - return ret; - if (copy_to_user(addr, &info, sizeof(info))) - return -EFAULT; - return 0; - } - case Q_SETINFO: { - struct if_dqinfo info; - - if (copy_from_user(&info, addr, sizeof(info))) - return -EFAULT; - return sb->s_qcop->set_info(sb, type, &info); - } - case Q_GETQUOTA: { - struct if_dqblk idq; - - ret = sb->s_qcop->get_dqblk(sb, type, id, &idq); - if (ret) - return ret; - if (copy_to_user(addr, &idq, sizeof(idq))) - return -EFAULT; - return 0; - } - case Q_SETQUOTA: { - struct if_dqblk idq; - - if (copy_from_user(&idq, addr, sizeof(idq))) - return -EFAULT; - return sb->s_qcop->set_dqblk(sb, type, id, &idq); - } - case Q_SYNC: - if (sb) - sync_quota_sb(sb, type); - else - sync_dquots(type); - return 0; - - case Q_XQUOTAON: - case Q_XQUOTAOFF: - case Q_XQUOTARM: { - __u32 flags; - - if (copy_from_user(&flags, addr, sizeof(flags))) - return -EFAULT; - return sb->s_qcop->set_xstate(sb, flags, cmd); - } - case Q_XGETQSTAT: { - struct fs_quota_stat fqs; - - if ((ret = sb->s_qcop->get_xstate(sb, &fqs))) - return ret; - if (copy_to_user(addr, &fqs, sizeof(fqs))) - return -EFAULT; - return 0; - } - case Q_XSETQLIM: { - struct fs_disk_quota fdq; - - if (copy_from_user(&fdq, addr, sizeof(fdq))) - return -EFAULT; - return sb->s_qcop->set_xquota(sb, type, id, &fdq); - } - case Q_XGETQUOTA: { - struct fs_disk_quota fdq; - - ret = sb->s_qcop->get_xquota(sb, type, id, &fdq); - if (ret) - return ret; - if (copy_to_user(addr, &fdq, sizeof(fdq))) - return -EFAULT; - return 0; - } - case Q_XQUOTASYNC: - return sb->s_qcop->quota_sync(sb, type); - /* We never reach here unless validity check is broken */ - default: - BUG(); + case Q_QUOTAON: + return quota_quotaon(sb, type, cmd, id, addr); + case Q_QUOTAOFF: + if (!sb->s_qcop->quota_off) + return -ENOSYS; + return sb->s_qcop->quota_off(sb, type, 0); + case Q_GETFMT: + return quota_getfmt(sb, type, addr); + case Q_GETINFO: + return quota_getinfo(sb, type, addr); + case Q_SETINFO: + return quota_setinfo(sb, type, addr); + case Q_GETQUOTA: + return quota_getquota(sb, type, id, addr); + case Q_SETQUOTA: + return quota_setquota(sb, type, id, addr); + case Q_SYNC: + if (!sb->s_qcop->quota_sync) + return -ENOSYS; + return sb->s_qcop->quota_sync(sb, type, 1); + case Q_XQUOTAON: + case Q_XQUOTAOFF: + case Q_XQUOTARM: + return quota_setxstate(sb, cmd, addr); + case Q_XGETQSTAT: + return quota_getxstate(sb, addr); + case Q_XSETQLIM: + return quota_setxquota(sb, type, id, addr); + case Q_XGETQUOTA: + return quota_getxquota(sb, type, id, addr); + case Q_XQUOTASYNC: + /* caller already holds s_umount */ + if (sb->s_flags & MS_RDONLY) + return -EROFS; + writeback_inodes_sb(sb); + return 0; + default: + return -EINVAL; } - return 0; } /* @@ -397,224 +319,23 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; - if (cmds != Q_SYNC || special) { - sb = quotactl_block(special); - if (IS_ERR(sb)) - return PTR_ERR(sb); + /* + * As a special case Q_SYNC can be called without a specific device. + * It will iterate all superblocks that have quota enabled and call + * the sync action on each of them. + */ + if (!special) { + if (cmds == Q_SYNC) + return quota_sync_all(type); + return -ENODEV; } - ret = check_quotactl_valid(sb, type, cmds, id); - if (ret >= 0) - ret = do_quotactl(sb, type, cmds, id, addr); - if (sb) - drop_super(sb); + sb = quotactl_block(special); + if (IS_ERR(sb)) + return PTR_ERR(sb); - return ret; -} - -#if defined(CONFIG_COMPAT_FOR_U64_ALIGNMENT) -/* - * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64) - * and is necessary due to alignment problems. - */ -struct compat_if_dqblk { - compat_u64 dqb_bhardlimit; - compat_u64 dqb_bsoftlimit; - compat_u64 dqb_curspace; - compat_u64 dqb_ihardlimit; - compat_u64 dqb_isoftlimit; - compat_u64 dqb_curinodes; - compat_u64 dqb_btime; - compat_u64 dqb_itime; - compat_uint_t dqb_valid; -}; - -/* XFS structures */ -struct compat_fs_qfilestat { - compat_u64 dqb_bhardlimit; - compat_u64 qfs_nblks; - compat_uint_t qfs_nextents; -}; - -struct compat_fs_quota_stat { - __s8 qs_version; - __u16 qs_flags; - __s8 qs_pad; - struct compat_fs_qfilestat qs_uquota; - struct compat_fs_qfilestat qs_gquota; - compat_uint_t qs_incoredqs; - compat_int_t qs_btimelimit; - compat_int_t qs_itimelimit; - compat_int_t qs_rtbtimelimit; - __u16 qs_bwarnlimit; - __u16 qs_iwarnlimit; -}; - -asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, - qid_t id, void __user *addr) -{ - unsigned int cmds; - struct if_dqblk __user *dqblk; - struct compat_if_dqblk __user *compat_dqblk; - struct fs_quota_stat __user *fsqstat; - struct compat_fs_quota_stat __user *compat_fsqstat; - compat_uint_t data; - u16 xdata; - long ret; + ret = do_quotactl(sb, type, cmds, id, addr); - cmds = cmd >> SUBCMDSHIFT; - - switch (cmds) { - case Q_GETQUOTA: - dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); - compat_dqblk = addr; - ret = sys_quotactl(cmd, special, id, dqblk); - if (ret) - break; - if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) || - get_user(data, &dqblk->dqb_valid) || - put_user(data, &compat_dqblk->dqb_valid)) - ret = -EFAULT; - break; - case Q_SETQUOTA: - dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); - compat_dqblk = addr; - ret = -EFAULT; - if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) || - get_user(data, &compat_dqblk->dqb_valid) || - put_user(data, &dqblk->dqb_valid)) - break; - ret = sys_quotactl(cmd, special, id, dqblk); - break; - case Q_XGETQSTAT: - fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat)); - compat_fsqstat = addr; - ret = sys_quotactl(cmd, special, id, fsqstat); - if (ret) - break; - ret = -EFAULT; - /* Copying qs_version, qs_flags, qs_pad */ - if (copy_in_user(compat_fsqstat, fsqstat, - offsetof(struct compat_fs_quota_stat, qs_uquota))) - break; - /* Copying qs_uquota */ - if (copy_in_user(&compat_fsqstat->qs_uquota, - &fsqstat->qs_uquota, - sizeof(compat_fsqstat->qs_uquota)) || - get_user(data, &fsqstat->qs_uquota.qfs_nextents) || - put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents)) - break; - /* Copying qs_gquota */ - if (copy_in_user(&compat_fsqstat->qs_gquota, - &fsqstat->qs_gquota, - sizeof(compat_fsqstat->qs_gquota)) || - get_user(data, &fsqstat->qs_gquota.qfs_nextents) || - put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents)) - break; - /* Copying the rest */ - if (copy_in_user(&compat_fsqstat->qs_incoredqs, - &fsqstat->qs_incoredqs, - sizeof(struct compat_fs_quota_stat) - - offsetof(struct compat_fs_quota_stat, qs_incoredqs)) || - get_user(xdata, &fsqstat->qs_iwarnlimit) || - put_user(xdata, &compat_fsqstat->qs_iwarnlimit)) - break; - ret = 0; - break; - default: - ret = sys_quotactl(cmd, special, id, addr); - } + drop_super(sb); return ret; } -#endif - - -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - -/* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "VFS_DQUOT", - .version = 1, - .maxattr = QUOTA_NL_A_MAX, -}; - -/** - * quota_send_warning - Send warning to userspace about exceeded quota - * @type: The quota type: USRQQUOTA, GRPQUOTA,... - * @id: The user or group id of the quota that was exceeded - * @dev: The device on which the fs is mounted (sb->s_dev) - * @warntype: The type of the warning: QUOTA_NL_... - * - * This can be used by filesystems (including those which don't use - * dquot) to send a message to userspace relating to quota limits. - * - */ - -void quota_send_warning(short type, unsigned int id, dev_t dev, - const char warntype) -{ - static atomic_t seq; - struct sk_buff *skb; - void *msg_head; - int ret; - int msg_size = 4 * nla_total_size(sizeof(u32)) + - 2 * nla_total_size(sizeof(u64)); - - /* We have to allocate using GFP_NOFS as we are called from a - * filesystem performing write and thus further recursion into - * the fs to free some data could cause deadlocks. */ - skb = genlmsg_new(msg_size, GFP_NOFS); - if (!skb) { - printk(KERN_ERR - "VFS: Not enough memory to send quota warning.\n"); - return; - } - msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), - "a_genl_family, 0, QUOTA_NL_C_WARNING); - if (!msg_head) { - printk(KERN_ERR - "VFS: Cannot store netlink header in quota warning.\n"); - goto err_out; - } - ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); - if (ret) - goto attr_err_out; - genlmsg_end(skb, msg_head); - - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); - return; -attr_err_out: - printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); -err_out: - kfree_skb(skb); -} -EXPORT_SYMBOL(quota_send_warning); - -static int __init quota_init(void) -{ - if (genl_register_family("a_genl_family) != 0) - printk(KERN_ERR - "VFS: Failed to create quota netlink interface.\n"); - return 0; -}; - -module_init(quota_init); -#endif - diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 65c87276117..dc014f7def0 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -425,7 +425,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, journal_mark_dirty(th, s, sbh); if (for_unformatted) - vfs_dq_free_block_nodirty(inode, 1); + dquot_free_block_nodirty(inode, 1); } void reiserfs_free_block(struct reiserfs_transaction_handle *th, @@ -1049,7 +1049,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start amount_needed, hint->inode->i_uid); #endif quota_ret = - vfs_dq_alloc_block_nodirty(hint->inode, amount_needed); + dquot_alloc_block_nodirty(hint->inode, amount_needed); if (quota_ret) /* Quota exceeded? */ return QUOTA_EXCEEDED; if (hint->preallocate && hint->prealloc_size) { @@ -1058,7 +1058,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid); #endif - quota_ret = vfs_dq_prealloc_block_nodirty(hint->inode, + quota_ret = dquot_prealloc_block_nodirty(hint->inode, hint->prealloc_size); if (quota_ret) hint->preallocate = hint->prealloc_size = 0; @@ -1092,7 +1092,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start hint->inode->i_uid); #endif /* Free not allocated blocks */ - vfs_dq_free_block_nodirty(hint->inode, + dquot_free_block_nodirty(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); } @@ -1125,7 +1125,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); #endif - vfs_dq_free_block_nodirty(hint->inode, amount_needed + + dquot_free_block_nodirty(hint->inode, amount_needed + hint->prealloc_size - nr_allocated - REISERFS_I(hint->inode)-> i_prealloc_count); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index da2dba082e2..1d9c12714c5 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -289,7 +289,7 @@ const struct file_operations reiserfs_file_operations = { .compat_ioctl = reiserfs_compat_ioctl, #endif .mmap = reiserfs_file_mmap, - .open = generic_file_open, + .open = dquot_file_open, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .aio_read = generic_file_aio_read, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 2df0f5c7c60..d1da94b82d8 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -34,6 +34,9 @@ void reiserfs_delete_inode(struct inode *inode) int depth; int err; + if (!is_bad_inode(inode)) + dquot_initialize(inode); + truncate_inode_pages(&inode->i_data, 0); depth = reiserfs_write_lock_once(inode->i_sb); @@ -54,7 +57,7 @@ void reiserfs_delete_inode(struct inode *inode) * after delete_object so that quota updates go into the same transaction as * stat data deletion */ if (!err) - vfs_dq_free_inode(inode); + dquot_free_inode(inode); if (journal_end(&th, inode->i_sb, jbegin_count)) goto out; @@ -1615,7 +1618,7 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, ** to properly mark inodes for datasync and such, but only actually ** does something when called for a synchronous update. */ -int reiserfs_write_inode(struct inode *inode, int do_sync) +int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct reiserfs_transaction_handle th; int jbegin_count = 1; @@ -1627,7 +1630,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync) ** inode needs to reach disk for safety, and they can safely be ** ignored because the altered inode has already been logged. */ - if (do_sync && !(current->flags & PF_MEMALLOC)) { + if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { reiserfs_write_lock(inode->i_sb); if (!journal_begin(&th, inode->i_sb, jbegin_count)) { reiserfs_update_sd(&th, inode); @@ -1765,10 +1768,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, BUG_ON(!th->t_trans_id); - if (vfs_dq_alloc_inode(inode)) { - err = -EDQUOT; + dquot_initialize(inode); + err = dquot_alloc_inode(inode); + if (err) goto out_end_trans; - } if (!dir->i_nlink) { err = -EPERM; goto out_bad_inode; @@ -1959,12 +1962,12 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, INODE_PKEY(inode)->k_objectid = 0; /* Quota change must be inside a transaction for journaling */ - vfs_dq_free_inode(inode); + dquot_free_inode(inode); out_end_trans: journal_end(th, th->t_super, th->t_blocks_allocated); /* Drop can be outside and it needs more credits so it's better to have it outside */ - vfs_dq_drop(inode); + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; make_bad_inode(inode); @@ -3073,6 +3076,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) depth = reiserfs_write_lock_once(inode->i_sb); if (attr->ia_valid & ATTR_SIZE) { + dquot_initialize(inode); + /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate */ @@ -3134,8 +3139,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) jbegin_count); if (error) goto out; - error = - vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + error = dquot_transfer(inode, attr); if (error) { journal_end(&th, inode->i_sb, jbegin_count); diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 9d4dcf0b07c..96e4cbbfaa1 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -546,7 +546,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, */ static int drop_new_inode(struct inode *inode) { - vfs_dq_drop(inode); + dquot_drop(inode); make_bad_inode(inode); inode->i_flags |= S_NOQUOTA; iput(inode); @@ -554,7 +554,7 @@ static int drop_new_inode(struct inode *inode) } /* utility function that does setup for reiserfs_new_inode. -** vfs_dq_init needs lots of credits so it's better to have it +** dquot_initialize needs lots of credits so it's better to have it ** outside of a transaction, so we had to pull some bits of ** reiserfs_new_inode out into this func. */ @@ -577,7 +577,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode) } else { inode->i_gid = current_fsgid(); } - vfs_dq_init(inode); + dquot_initialize(inode); return 0; } @@ -594,6 +594,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, struct reiserfs_transaction_handle th; struct reiserfs_security_handle security; + dquot_initialize(dir); + if (!(inode = new_inode(dir->i_sb))) { return -ENOMEM; } @@ -666,6 +668,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, if (!new_valid_dev(rdev)) return -EINVAL; + dquot_initialize(dir); + if (!(inode = new_inode(dir->i_sb))) { return -ENOMEM; } @@ -739,6 +743,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); + dquot_initialize(dir); + #ifdef DISPLACE_NEW_PACKING_LOCALITIES /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ REISERFS_I(dir)->new_packing_locality = 1; @@ -842,6 +848,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); + dquot_initialize(dir); + reiserfs_write_lock(dir->i_sb); retval = journal_begin(&th, dir->i_sb, jbegin_count); if (retval) @@ -923,6 +931,8 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) unsigned long savelink; int depth; + dquot_initialize(dir); + inode = dentry->d_inode; /* in this transaction we can be doing at max two balancings and update @@ -1024,6 +1034,8 @@ static int reiserfs_symlink(struct inode *parent_dir, 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); + dquot_initialize(parent_dir); + if (!(inode = new_inode(parent_dir->i_sb))) { return -ENOMEM; } @@ -1111,6 +1123,8 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); + dquot_initialize(dir); + reiserfs_write_lock(dir->i_sb); if (inode->i_nlink >= REISERFS_LINK_MAX) { //FIXME: sd_nlink is 32 bit for new files @@ -1235,6 +1249,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); + dquot_initialize(old_dir); + dquot_initialize(new_dir); + old_inode = old_dentry->d_inode; new_dentry_inode = new_dentry->d_inode; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 5fa7118f04e..313d39d639e 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1299,7 +1299,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, inode->i_uid, head2type(&s_ih)); #endif - vfs_dq_free_space_nodirty(inode, quota_cut_bytes); + dquot_free_space_nodirty(inode, quota_cut_bytes); /* Return deleted body length */ return ret_value; @@ -1383,7 +1383,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, quota_cut_bytes, inode->i_uid, key2type(key)); #endif - vfs_dq_free_space_nodirty(inode, + dquot_free_space_nodirty(inode, quota_cut_bytes); } break; @@ -1733,7 +1733,7 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, '?'); #endif - vfs_dq_free_space_nodirty(inode, quota_cut_bytes); + dquot_free_space_nodirty(inode, quota_cut_bytes); return ret_value; } @@ -1968,9 +1968,10 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree key2type(&(key->on_disk_key))); #endif - if (vfs_dq_alloc_space_nodirty(inode, pasted_size)) { + retval = dquot_alloc_space_nodirty(inode, pasted_size); + if (retval) { pathrelse(search_path); - return -EDQUOT; + return retval; } init_tb_struct(th, &s_paste_balance, th->t_super, search_path, pasted_size); @@ -2024,7 +2025,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree pasted_size, inode->i_uid, key2type(&(key->on_disk_key))); #endif - vfs_dq_free_space_nodirty(inode, pasted_size); + dquot_free_space_nodirty(inode, pasted_size); return retval; } @@ -2062,9 +2063,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, #endif /* We can't dirty inode here. It would be immediately written but * appropriate stat item isn't inserted yet... */ - if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) { + retval = dquot_alloc_space_nodirty(inode, quota_bytes); + if (retval) { pathrelse(path); - return -EDQUOT; + return retval; } } init_tb_struct(th, &s_ins_balance, th->t_super, path, @@ -2113,6 +2115,6 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, quota_bytes, inode->i_uid, head2type(ih)); #endif if (inode) - vfs_dq_free_space_nodirty(inode, quota_bytes); + dquot_free_space_nodirty(inode, quota_bytes); return retval; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b4a7dd03bdb..04bf5d791bd 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -246,7 +246,7 @@ static int finish_unfinished(struct super_block *s) retval = remove_save_link_only(s, &save_link_key, 0); continue; } - vfs_dq_init(inode); + dquot_initialize(inode); if (truncate && S_ISDIR(inode->i_mode)) { /* We got a truncate request for a dir which is impossible. @@ -578,6 +578,11 @@ out: reiserfs_write_unlock_once(inode->i_sb, lock_depth); } +static void reiserfs_clear_inode(struct inode *inode) +{ + dquot_drop(inode); +} + #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); @@ -590,6 +595,7 @@ static const struct super_operations reiserfs_sops = { .destroy_inode = reiserfs_destroy_inode, .write_inode = reiserfs_write_inode, .dirty_inode = reiserfs_dirty_inode, + .clear_inode = reiserfs_clear_inode, .delete_inode = reiserfs_delete_inode, .put_super = reiserfs_put_super, .write_super = reiserfs_write_super, @@ -616,13 +622,6 @@ static int reiserfs_write_info(struct super_block *, int); static int reiserfs_quota_on(struct super_block *, int, int, char *, int); static const struct dquot_operations reiserfs_quota_operations = { - .initialize = dquot_initialize, - .drop = dquot_drop, - .alloc_space = dquot_alloc_space, - .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, - .free_inode = dquot_free_inode, - .transfer = dquot_transfer, .write_dquot = reiserfs_write_dquot, .acquire_dquot = reiserfs_acquire_dquot, .release_dquot = reiserfs_release_dquot, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 81f09fab8ae..37d034ca7d9 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -61,7 +61,6 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) { BUG_ON(!mutex_is_locked(&dir->i_mutex)); - vfs_dq_init(dir); return dir->i_op->create(dir, dentry, mode, NULL); } #endif @@ -69,7 +68,6 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode) { BUG_ON(!mutex_is_locked(&dir->i_mutex)); - vfs_dq_init(dir); return dir->i_op->mkdir(dir, dentry, mode); } @@ -81,7 +79,6 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry) { int error; BUG_ON(!mutex_is_locked(&dir->i_mutex)); - vfs_dq_init(dir); reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, I_MUTEX_CHILD, dir->i_sb); @@ -97,7 +94,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) { int error; BUG_ON(!mutex_is_locked(&dir->i_mutex)); - vfs_dq_init(dir); reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, I_MUTEX_CHILD, dir->i_sb); diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 70e3244fa30..df8a19ef870 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o +squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2a796031034..1cb0d81b164 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -29,15 +29,14 @@ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> -#include <linux/mutex.h> #include <linux/string.h> #include <linux/buffer_head.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" +#include "decompressor.h" /* * Read the metadata block length, this is stored in the first two @@ -153,72 +152,10 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, } if (compressed) { - int zlib_err = 0, zlib_init = 0; - - /* - * Uncompress block. - */ - - mutex_lock(&msblk->read_data_mutex); - - msblk->stream.avail_out = 0; - msblk->stream.avail_in = 0; - - bytes = length; - do { - if (msblk->stream.avail_in == 0 && k < b) { - avail = min(bytes, msblk->devblksize - offset); - bytes -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - - if (avail == 0) { - offset = 0; - put_bh(bh[k++]); - continue; - } - - msblk->stream.next_in = bh[k]->b_data + offset; - msblk->stream.avail_in = avail; - offset = 0; - } - - if (msblk->stream.avail_out == 0 && page < pages) { - msblk->stream.next_out = buffer[page++]; - msblk->stream.avail_out = PAGE_CACHE_SIZE; - } - - if (!zlib_init) { - zlib_err = zlib_inflateInit(&msblk->stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflateInit returned" - " unexpected result 0x%x," - " srclength %d\n", zlib_err, - srclength); - goto release_mutex; - } - zlib_init = 1; - } - - zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH); - - if (msblk->stream.avail_in == 0 && k < b) - put_bh(bh[k++]); - } while (zlib_err == Z_OK); - - if (zlib_err != Z_STREAM_END) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } - - zlib_err = zlib_inflateEnd(&msblk->stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } - length = msblk->stream.total_out; - mutex_unlock(&msblk->read_data_mutex); + length = squashfs_decompress(msblk, buffer, bh, b, offset, + length, srclength, pages); + if (length < 0) + goto read_failure; } else { /* * Block is uncompressed. @@ -255,9 +192,6 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, kfree(bh); return length; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); - block_release: for (; k < b; k++) put_bh(bh[k]); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 40c98fa6b5d..57314bee905 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -51,7 +51,6 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/wait.h> -#include <linux/zlib.h> #include <linux/pagemap.h> #include "squashfs_fs.h" diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c new file mode 100644 index 00000000000..157478da6ac --- /dev/null +++ b/fs/squashfs/decompressor.c @@ -0,0 +1,68 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * decompressor.c + */ + +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/buffer_head.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file (and decompressor.h) implements a decompressor framework for + * Squashfs, allowing multiple decompressors to be easily supported + */ + +static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { + NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 +}; + +static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { + NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 +}; + +static const struct squashfs_decompressor squashfs_unknown_comp_ops = { + NULL, NULL, NULL, 0, "unknown", 0 +}; + +static const struct squashfs_decompressor *decompressor[] = { + &squashfs_zlib_comp_ops, + &squashfs_lzma_unsupported_comp_ops, + &squashfs_lzo_unsupported_comp_ops, + &squashfs_unknown_comp_ops +}; + + +const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) +{ + int i; + + for (i = 0; decompressor[i]->id; i++) + if (id == decompressor[i]->id) + break; + + return decompressor[i]; +} diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h new file mode 100644 index 00000000000..7425f80783f --- /dev/null +++ b/fs/squashfs/decompressor.h @@ -0,0 +1,55 @@ +#ifndef DECOMPRESSOR_H +#define DECOMPRESSOR_H +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * decompressor.h + */ + +struct squashfs_decompressor { + void *(*init)(struct squashfs_sb_info *); + void (*free)(void *); + int (*decompress)(struct squashfs_sb_info *, void **, + struct buffer_head **, int, int, int, int, int); + int id; + char *name; + int supported; +}; + +static inline void *squashfs_decompressor_init(struct squashfs_sb_info *msblk) +{ + return msblk->decompressor->init(msblk); +} + +static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, + void *s) +{ + if (msblk->decompressor) + msblk->decompressor->free(s); +} + +static inline int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, + length, srclength, pages); +} +#endif diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 566b0eaed86..12b933ac658 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -30,7 +30,6 @@ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 2b1b8fe5e03..7f93d5a9ee0 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -39,7 +39,6 @@ #include <linux/vfs.h> #include <linux/dcache.h> #include <linux/exportfs.h> -#include <linux/zlib.h> #include <linux/slab.h> #include "squashfs_fs.h" diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 717767d831d..a25c5060bdc 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -47,7 +47,6 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index b5a2c15bbbc..7c90bbd6879 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -36,7 +36,6 @@ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index 3795b837ba2..b7f64bcd2b7 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -34,7 +34,6 @@ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 9101dbde39e..49daaf669e4 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -40,7 +40,6 @@ #include <linux/fs.h> #include <linux/vfs.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 9e398653b22..5266bd8ad93 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -57,7 +57,6 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/dcache.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 0e9feb6adf7..fe2587af551 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -51,6 +51,9 @@ extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *, u64, int); extern int squashfs_read_table(struct super_block *, void *, u64, int); +/* decompressor.c */ +extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); + /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, unsigned int); @@ -71,7 +74,7 @@ extern struct inode *squashfs_iget(struct super_block *, long long, extern int squashfs_read_inode(struct inode *, long long); /* - * Inodes and files operations + * Inodes, files and decompressor operations */ /* dir.c */ @@ -88,3 +91,6 @@ extern const struct inode_operations squashfs_dir_inode_ops; /* symlink.c */ extern const struct address_space_operations squashfs_symlink_aops; + +/* zlib_wrapper.c */ +extern const struct squashfs_decompressor squashfs_zlib_comp_ops; diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 283daafc568..79024245ea0 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -183,8 +183,6 @@ #define SQUASHFS_MAX_FILE_SIZE (1LL << \ (SQUASHFS_MAX_FILE_SIZE_LOG - 2)) -#define SQUASHFS_MARKER_BYTE 0xff - /* meta index cache */ #define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int)) #define SQUASHFS_META_ENTRIES 127 @@ -211,7 +209,9 @@ struct meta_index { /* * definitions for structures on disk */ -#define ZLIB_COMPRESSION 1 +#define ZLIB_COMPRESSION 1 +#define LZMA_COMPRESSION 2 +#define LZO_COMPRESSION 3 struct squashfs_super_block { __le32 s_magic; diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index c8c65614dd1..2e77dc547e2 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -52,25 +52,25 @@ struct squashfs_cache_entry { }; struct squashfs_sb_info { - int devblksize; - int devblksize_log2; - struct squashfs_cache *block_cache; - struct squashfs_cache *fragment_cache; - struct squashfs_cache *read_page; - int next_meta_index; - __le64 *id_table; - __le64 *fragment_index; - unsigned int *fragment_index_2; - struct mutex read_data_mutex; - struct mutex meta_index_mutex; - struct meta_index *meta_index; - z_stream stream; - __le64 *inode_lookup_table; - u64 inode_table; - u64 directory_table; - unsigned int block_size; - unsigned short block_log; - long long bytes_used; - unsigned int inodes; + const struct squashfs_decompressor *decompressor; + int devblksize; + int devblksize_log2; + struct squashfs_cache *block_cache; + struct squashfs_cache *fragment_cache; + struct squashfs_cache *read_page; + int next_meta_index; + __le64 *id_table; + __le64 *fragment_index; + struct mutex read_data_mutex; + struct mutex meta_index_mutex; + struct meta_index *meta_index; + void *stream; + __le64 *inode_lookup_table; + u64 inode_table; + u64 directory_table; + unsigned int block_size; + unsigned short block_log; + long long bytes_used; + unsigned int inodes; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 6c197ef53ad..3550aec2f65 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -35,34 +35,41 @@ #include <linux/pagemap.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/zlib.h> #include <linux/magic.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" +#include "decompressor.h" static struct file_system_type squashfs_fs_type; static const struct super_operations squashfs_super_ops; -static int supported_squashfs_filesystem(short major, short minor, short comp) +static const struct squashfs_decompressor *supported_squashfs_filesystem(short + major, short minor, short id) { + const struct squashfs_decompressor *decompressor; + if (major < SQUASHFS_MAJOR) { ERROR("Major/Minor mismatch, older Squashfs %d.%d " "filesystems are unsupported\n", major, minor); - return -EINVAL; + return NULL; } else if (major > SQUASHFS_MAJOR || minor > SQUASHFS_MINOR) { ERROR("Major/Minor mismatch, trying to mount newer " "%d.%d filesystem\n", major, minor); ERROR("Please update your kernel\n"); - return -EINVAL; + return NULL; } - if (comp != ZLIB_COMPRESSION) - return -EINVAL; + decompressor = squashfs_lookup_decompressor(id); + if (!decompressor->supported) { + ERROR("Filesystem uses \"%s\" compression. This is not " + "supported\n", decompressor->name); + return NULL; + } - return 0; + return decompressor; } @@ -87,13 +94,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) } msblk = sb->s_fs_info; - msblk->stream.workspace = kmalloc(zlib_inflate_workspacesize(), - GFP_KERNEL); - if (msblk->stream.workspace == NULL) { - ERROR("Failed to allocate zlib workspace\n"); - goto failure; - } - sblk = kzalloc(sizeof(*sblk), GFP_KERNEL); if (sblk == NULL) { ERROR("Failed to allocate squashfs_super_block\n"); @@ -120,25 +120,25 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } + err = -EINVAL; + /* Check it is a SQUASHFS superblock */ sb->s_magic = le32_to_cpu(sblk->s_magic); if (sb->s_magic != SQUASHFS_MAGIC) { if (!silent) ERROR("Can't find a SQUASHFS superblock on %s\n", bdevname(sb->s_bdev, b)); - err = -EINVAL; goto failed_mount; } - /* Check the MAJOR & MINOR versions and compression type */ - err = supported_squashfs_filesystem(le16_to_cpu(sblk->s_major), + /* Check the MAJOR & MINOR versions and lookup compression type */ + msblk->decompressor = supported_squashfs_filesystem( + le16_to_cpu(sblk->s_major), le16_to_cpu(sblk->s_minor), le16_to_cpu(sblk->compression)); - if (err < 0) + if (msblk->decompressor == NULL) goto failed_mount; - err = -EINVAL; - /* * Check if there's xattrs in the filesystem. These are not * supported in this version, so warn that they will be ignored. @@ -205,6 +205,10 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; + msblk->stream = squashfs_decompressor_init(msblk); + if (msblk->stream == NULL) + goto failed_mount; + msblk->block_cache = squashfs_cache_init("metadata", SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); if (msblk->block_cache == NULL) @@ -292,17 +296,16 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); + squashfs_decompressor_free(msblk, msblk->stream); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); - kfree(msblk->stream.workspace); kfree(sb->s_fs_info); sb->s_fs_info = NULL; kfree(sblk); return err; failure: - kfree(msblk->stream.workspace); kfree(sb->s_fs_info); sb->s_fs_info = NULL; return -ENOMEM; @@ -346,10 +349,10 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); + squashfs_decompressor_free(sbi, sbi->stream); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); - kfree(sbi->stream.workspace); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c index 83d87880aac..e80be2022a7 100644 --- a/fs/squashfs/symlink.c +++ b/fs/squashfs/symlink.c @@ -36,7 +36,6 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/pagemap.h> -#include <linux/zlib.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c new file mode 100644 index 00000000000..4dd70e04333 --- /dev/null +++ b/fs/squashfs/zlib_wrapper.c @@ -0,0 +1,150 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * zlib_wrapper.c + */ + + +#include <linux/mutex.h> +#include <linux/buffer_head.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" +#include "decompressor.h" + +static void *zlib_init(struct squashfs_sb_info *dummy) +{ + z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); + if (stream == NULL) + goto failed; + stream->workspace = kmalloc(zlib_inflate_workspacesize(), + GFP_KERNEL); + if (stream->workspace == NULL) + goto failed; + + return stream; + +failed: + ERROR("Failed to allocate zlib workspace\n"); + kfree(stream); + return NULL; +} + + +static void zlib_free(void *strm) +{ + z_stream *stream = strm; + + if (stream) + kfree(stream->workspace); + kfree(stream); +} + + +static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, + struct buffer_head **bh, int b, int offset, int length, int srclength, + int pages) +{ + int zlib_err = 0, zlib_init = 0; + int avail, bytes, k = 0, page = 0; + z_stream *stream = msblk->stream; + + mutex_lock(&msblk->read_data_mutex); + + stream->avail_out = 0; + stream->avail_in = 0; + + bytes = length; + do { + if (stream->avail_in == 0 && k < b) { + avail = min(bytes, msblk->devblksize - offset); + bytes -= avail; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto release_mutex; + + if (avail == 0) { + offset = 0; + put_bh(bh[k++]); + continue; + } + + stream->next_in = bh[k]->b_data + offset; + stream->avail_in = avail; + offset = 0; + } + + if (stream->avail_out == 0 && page < pages) { + stream->next_out = buffer[page++]; + stream->avail_out = PAGE_CACHE_SIZE; + } + + if (!zlib_init) { + zlib_err = zlib_inflateInit(stream); + if (zlib_err != Z_OK) { + ERROR("zlib_inflateInit returned unexpected " + "result 0x%x, srclength %d\n", + zlib_err, srclength); + goto release_mutex; + } + zlib_init = 1; + } + + zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH); + + if (stream->avail_in == 0 && k < b) + put_bh(bh[k++]); + } while (zlib_err == Z_OK); + + if (zlib_err != Z_STREAM_END) { + ERROR("zlib_inflate error, data probably corrupt\n"); + goto release_mutex; + } + + zlib_err = zlib_inflateEnd(stream); + if (zlib_err != Z_OK) { + ERROR("zlib_inflate error, data probably corrupt\n"); + goto release_mutex; + } + + mutex_unlock(&msblk->read_data_mutex); + return stream->total_out; + +release_mutex: + mutex_unlock(&msblk->read_data_mutex); + + for (; k < b; k++) + put_bh(bh[k]); + + return -EIO; +} + +const struct squashfs_decompressor squashfs_zlib_comp_ops = { + .init = zlib_init, + .free = zlib_free, + .decompress = zlib_uncompress, + .id = ZLIB_COMPRESSION, + .name = "zlib", + .supported = 1 +}; + diff --git a/fs/sync.c b/fs/sync.c index 418727a2a23..f557d71cb09 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -34,14 +34,14 @@ static int __sync_filesystem(struct super_block *sb, int wait) if (!sb->s_bdi) return 0; - /* Avoid doing twice syncing and cache pruning for quota sync */ - if (!wait) { - writeout_quota_sb(sb, -1); - writeback_inodes_sb(sb); - } else { - sync_quota_sb(sb, -1); + if (sb->s_qcop && sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, -1, wait); + + if (wait) sync_inodes_sb(sb); - } + else + writeback_inodes_sb(sb); + if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); return __sync_blockdev(sb->s_bdev, wait); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 9824743832a..4573734d723 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/vfs.h> +#include <linux/writeback.h> #include <linux/namei.h> #include <asm/byteorder.h> #include "sysv.h" @@ -246,7 +247,7 @@ bad_inode: return ERR_PTR(-EIO); } -int sysv_write_inode(struct inode *inode, int wait) +static int __sysv_write_inode(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); @@ -296,9 +297,14 @@ int sysv_write_inode(struct inode *inode, int wait) return 0; } +int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + int sysv_sync_inode(struct inode *inode) { - return sysv_write_inode(inode, 1); + return __sysv_write_inode(inode, 1); } static void sysv_delete_inode(struct inode *inode) diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 53786eb5cf6..94cb9b4d76c 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -142,7 +142,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, /* inode.c */ extern struct inode *sysv_iget(struct super_block *, unsigned int); -extern int sysv_write_inode(struct inode *, int); +extern int sysv_write_inode(struct inode *, struct writeback_control *wbc); extern int sysv_sync_inode(struct inode *); extern void sysv_set_inode(struct inode *, dev_t); extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 552fb0111ff..401e503d44a 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1120,7 +1120,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, if (release) ubifs_release_budget(c, &ino_req); if (IS_SYNC(old_inode)) - err = old_inode->i_sb->s_op->write_inode(old_inode, 1); + err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); return err; out_cancel: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 16a6444330e..e26c02ab6cd 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1011,7 +1011,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) /* Is the page fully inside @i_size? */ if (page->index < end_index) { if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; /* @@ -1039,7 +1039,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) kunmap_atomic(kaddr, KM_USER0); if (i_size > synced_i_size) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; } @@ -1242,7 +1242,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (release) ubifs_release_budget(c, &req); if (IS_SYNC(inode)) - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); return err; out: @@ -1316,7 +1316,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) * the inode unless this is a 'datasync()' call. */ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 43f9d19a6f3..4d2f2157dd3 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -283,7 +283,7 @@ static void ubifs_destroy_inode(struct inode *inode) /* * Note, Linux write-back code calls this without 'i_mutex'. */ -static int ubifs_write_inode(struct inode *inode, int wait) +static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) { int err = 0; struct ubifs_info *c = inode->i_sb->s_fs_info; diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index b2d96f45c12..ccc3ad7242d 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -208,7 +208,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb, ((char *)bh->b_data)[(bit + i) >> 3]); } else { if (inode) - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); udf_add_free_space(sb, sbi->s_partition, 1); } } @@ -260,11 +260,11 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb, while (bit < (sb->s_blocksize << 3) && block_count > 0) { if (!udf_test_bit(bit, bh->b_data)) goto out; - else if (vfs_dq_prealloc_block(inode, 1)) + else if (dquot_prealloc_block(inode, 1)) goto out; else if (!udf_clear_bit(bit, bh->b_data)) { udf_debug("bit already cleared for block %d\n", bit); - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); goto out; } block_count--; @@ -390,10 +390,14 @@ got_block: /* * Check quota for allocation of this block. */ - if (inode && vfs_dq_alloc_block(inode, 1)) { - mutex_unlock(&sbi->s_alloc_mutex); - *err = -EDQUOT; - return 0; + if (inode) { + int ret = dquot_alloc_block(inode, 1); + + if (ret) { + mutex_unlock(&sbi->s_alloc_mutex); + *err = ret; + return 0; + } } newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) - @@ -449,7 +453,7 @@ static void udf_table_free_blocks(struct super_block *sb, /* We do this up front - There are some error conditions that could occure, but.. oh well */ if (inode) - vfs_dq_free_block(inode, count); + dquot_free_block(inode, count); udf_add_free_space(sb, sbi->s_partition, count); start = bloc->logicalBlockNum + offset; @@ -694,7 +698,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, epos.offset -= adsize; alloc_count = (elen >> sb->s_blocksize_bits); - if (inode && vfs_dq_prealloc_block(inode, + if (inode && dquot_prealloc_block(inode, alloc_count > block_count ? block_count : alloc_count)) alloc_count = 0; else if (alloc_count > block_count) { @@ -797,12 +801,13 @@ static int udf_table_new_block(struct super_block *sb, newblock = goal_eloc.logicalBlockNum; goal_eloc.logicalBlockNum++; goal_elen -= sb->s_blocksize; - - if (inode && vfs_dq_alloc_block(inode, 1)) { - brelse(goal_epos.bh); - mutex_unlock(&sbi->s_alloc_mutex); - *err = -EDQUOT; - return 0; + if (inode) { + *err = dquot_alloc_block(inode, 1); + if (*err) { + brelse(goal_epos.bh); + mutex_unlock(&sbi->s_alloc_mutex); + return 0; + } } if (goal_elen) diff --git a/fs/udf/file.c b/fs/udf/file.c index f311d509b6a..1eb06774ed9 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -34,6 +34,7 @@ #include <linux/errno.h> #include <linux/smp_lock.h> #include <linux/pagemap.h> +#include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/aio.h> @@ -207,7 +208,7 @@ const struct file_operations udf_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .ioctl = udf_ioctl, - .open = generic_file_open, + .open = dquot_file_open, .mmap = generic_file_mmap, .write = do_sync_write, .aio_write = udf_file_aio_write, @@ -217,6 +218,29 @@ const struct file_operations udf_file_operations = { .llseek = generic_file_llseek, }; +static int udf_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, iattr); + if (error) + return error; + + if (iattr->ia_valid & ATTR_SIZE) + dquot_initialize(inode); + + if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || + (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { + error = dquot_transfer(inode, iattr); + if (error) + return error; + } + + return inode_setattr(inode, iattr); +} + const struct inode_operations udf_file_inode_operations = { - .truncate = udf_truncate, + .truncate = udf_truncate, + .setattr = udf_setattr, }; diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index c10fa39f97e..fb68c9cd0c3 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -36,8 +36,8 @@ void udf_free_inode(struct inode *inode) * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ - vfs_dq_free_inode(inode); - vfs_dq_drop(inode); + dquot_free_inode(inode); + dquot_drop(inode); clear_inode(inode); @@ -61,7 +61,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) struct super_block *sb = dir->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); struct inode *inode; - int block; + int block, ret; uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; struct udf_inode_info *iinfo; struct udf_inode_info *dinfo = UDF_I(dir); @@ -153,12 +153,14 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) insert_inode_hash(inode); mark_inode_dirty(inode); - if (vfs_dq_alloc_inode(inode)) { - vfs_dq_drop(inode); + dquot_initialize(inode); + ret = dquot_alloc_inode(inode); + if (ret) { + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); - *err = -EDQUOT; + *err = ret; return NULL; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 378a7592257..b57ab0402d8 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -36,6 +36,7 @@ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/quotaops.h> #include <linux/slab.h> #include <linux/crc-itu-t.h> @@ -70,6 +71,9 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); void udf_delete_inode(struct inode *inode) { + if (!is_bad_inode(inode)) + dquot_initialize(inode); + truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) @@ -108,6 +112,8 @@ void udf_clear_inode(struct inode *inode) (unsigned long long)inode->i_size, (unsigned long long)iinfo->i_lenExtents); } + + dquot_drop(inode); kfree(iinfo->i_ext.i_data); iinfo->i_ext.i_data = NULL; } @@ -1373,12 +1379,12 @@ static mode_t udf_convert_permissions(struct fileEntry *fe) return mode; } -int udf_write_inode(struct inode *inode, int sync) +int udf_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; lock_kernel(); - ret = udf_update_inode(inode, sync); + ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 7c56ff00cd5..db423ab078b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -563,6 +563,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, int err; struct udf_inode_info *iinfo; + dquot_initialize(dir); + lock_kernel(); inode = udf_new_inode(dir, mode, &err); if (!inode) { @@ -616,6 +618,8 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, if (!old_valid_dev(rdev)) return -EINVAL; + dquot_initialize(dir); + lock_kernel(); err = -EIO; inode = udf_new_inode(dir, mode, &err); @@ -662,6 +666,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct udf_inode_info *dinfo = UDF_I(dir); struct udf_inode_info *iinfo; + dquot_initialize(dir); + lock_kernel(); err = -EMLINK; if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) @@ -799,6 +805,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) struct fileIdentDesc *fi, cfi; struct kernel_lb_addr tloc; + dquot_initialize(dir); + retval = -ENOENT; lock_kernel(); fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); @@ -845,6 +853,8 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) struct fileIdentDesc cfi; struct kernel_lb_addr tloc; + dquot_initialize(dir); + retval = -ENOENT; lock_kernel(); fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); @@ -899,6 +909,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, struct buffer_head *bh; struct udf_inode_info *iinfo; + dquot_initialize(dir); + lock_kernel(); inode = udf_new_inode(dir, S_IFLNK, &err); if (!inode) @@ -1069,6 +1081,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, int err; struct buffer_head *bh; + dquot_initialize(dir); + lock_kernel(); if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { unlock_kernel(); @@ -1131,6 +1145,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernel_lb_addr tloc; struct udf_inode_info *old_iinfo = UDF_I(old_inode); + dquot_initialize(old_dir); + dquot_initialize(new_dir); + lock_kernel(); ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8d46f4294ee..4223ac855da 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -142,7 +142,7 @@ extern void udf_truncate(struct inode *); extern void udf_read_inode(struct inode *); extern void udf_delete_inode(struct inode *); extern void udf_clear_inode(struct inode *); -extern int udf_write_inode(struct inode *, int); +extern int udf_write_inode(struct inode *, struct writeback_control *wbc); extern long udf_block_map(struct inode *, sector_t); extern int udf_extend_file(struct inode *, struct extent_position *, struct kernel_long_ad *, sector_t); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 54c16ec95df..5cfa4d85ccf 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -85,7 +85,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) "bit already cleared for fragment %u", i); } - vfs_dq_free_block(inode, count); + dquot_free_block(inode, count); fs32_add(sb, &ucg->cg_cs.cs_nffree, count); @@ -195,7 +195,7 @@ do_more: ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, 1); - vfs_dq_free_block(inode, uspi->s_fpb); + dquot_free_block(inode, uspi->s_fpb); fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1); uspi->cs_total.cs_nbfree++; @@ -511,6 +511,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, struct ufs_cg_private_info * ucpi; struct ufs_cylinder_group * ucg; unsigned cgno, fragno, fragoff, count, fragsize, i; + int ret; UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n", (unsigned long long)fragment, oldcount, newcount); @@ -556,8 +557,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1); for (i = oldcount; i < newcount; i++) ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i); - if (vfs_dq_alloc_block(inode, count)) { - *err = -EDQUOT; + ret = dquot_alloc_block(inode, count); + if (ret) { + *err = ret; return 0; } @@ -596,6 +598,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, struct ufs_cylinder_group * ucg; unsigned oldcg, i, j, k, allocsize; u64 result; + int ret; UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n", inode->i_ino, cgno, (unsigned long long)goal, count); @@ -664,7 +667,7 @@ cg_found: for (i = count; i < uspi->s_fpb; i++) ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); i = uspi->s_fpb - count; - vfs_dq_free_block(inode, i); + dquot_free_block(inode, i); fs32_add(sb, &ucg->cg_cs.cs_nffree, i); uspi->cs_total.cs_nffree += i; @@ -676,8 +679,9 @@ cg_found: result = ufs_bitmap_search (sb, ucpi, goal, allocsize); if (result == INVBLOCK) return 0; - if (vfs_dq_alloc_block(inode, count)) { - *err = -EDQUOT; + ret = dquot_alloc_block(inode, count); + if (ret) { + *err = ret; return 0; } for (i = 0; i < count; i++) @@ -714,6 +718,7 @@ static u64 ufs_alloccg_block(struct inode *inode, struct ufs_super_block_first * usb1; struct ufs_cylinder_group * ucg; u64 result, blkno; + int ret; UFSD("ENTER, goal %llu\n", (unsigned long long)goal); @@ -747,8 +752,9 @@ gotit: ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, -1); - if (vfs_dq_alloc_block(inode, uspi->s_fpb)) { - *err = -EDQUOT; + ret = dquot_alloc_block(inode, uspi->s_fpb); + if (ret) { + *err = ret; return INVBLOCK; } diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 73655c61240..a8962cecde5 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -24,6 +24,7 @@ */ #include <linux/fs.h> +#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -40,7 +41,7 @@ const struct file_operations ufs_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .open = generic_file_open, + .open = dquot_file_open, .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 3527c00fef0..230ecf60802 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -95,8 +95,8 @@ void ufs_free_inode (struct inode * inode) is_directory = S_ISDIR(inode->i_mode); - vfs_dq_free_inode(inode); - vfs_dq_drop(inode); + dquot_free_inode(inode); + dquot_drop(inode); clear_inode (inode); @@ -355,9 +355,10 @@ cg_found: unlock_super (sb); - if (vfs_dq_alloc_inode(inode)) { - vfs_dq_drop(inode); - err = -EDQUOT; + dquot_initialize(inode); + err = dquot_alloc_inode(inode); + if (err) { + dquot_drop(inode); goto fail_without_unlock; } diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7cf33379fd4..80b68c3702d 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -36,6 +36,8 @@ #include <linux/mm.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/writeback.h> +#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -890,11 +892,11 @@ static int ufs_update_inode(struct inode * inode, int do_sync) return 0; } -int ufs_write_inode (struct inode * inode, int wait) +int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; lock_kernel(); - ret = ufs_update_inode (inode, wait); + ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); unlock_kernel(); return ret; } @@ -908,6 +910,9 @@ void ufs_delete_inode (struct inode * inode) { loff_t old_i_size; + if (!is_bad_inode(inode)) + dquot_initialize(inode); + truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) goto no_delete; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 4c26d9e8bc9..118556243e7 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -30,6 +30,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/smp_lock.h> +#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -84,6 +85,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, int err; UFSD("BEGIN\n"); + + dquot_initialize(dir); + inode = ufs_new_inode(dir, mode); err = PTR_ERR(inode); @@ -107,6 +111,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t if (!old_valid_dev(rdev)) return -EINVAL; + + dquot_initialize(dir); + inode = ufs_new_inode(dir, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { @@ -131,6 +138,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; + dquot_initialize(dir); + lock_kernel(); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); @@ -176,6 +185,8 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, return -EMLINK; } + dquot_initialize(dir); + inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); atomic_inc(&inode->i_count); @@ -193,6 +204,8 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (dir->i_nlink >= UFS_LINK_MAX) goto out; + dquot_initialize(dir); + lock_kernel(); inode_inc_link_count(dir); @@ -237,6 +250,8 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry) struct page *page; int err = -ENOENT; + dquot_initialize(dir); + de = ufs_find_entry(dir, &dentry->d_name, &page); if (!de) goto out; @@ -281,6 +296,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ufs_dir_entry *old_de; int err = -ENOENT; + dquot_initialize(old_dir); + dquot_initialize(new_dir); + old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 143c20bfb04..66b63a75161 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1432,6 +1432,11 @@ static void destroy_inodecache(void) kmem_cache_destroy(ufs_inode_cachep); } +static void ufs_clear_inode(struct inode *inode) +{ + dquot_drop(inode); +} + #ifdef CONFIG_QUOTA static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t); static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t); @@ -1442,6 +1447,7 @@ static const struct super_operations ufs_super_ops = { .destroy_inode = ufs_destroy_inode, .write_inode = ufs_write_inode, .delete_inode = ufs_delete_inode, + .clear_inode = ufs_clear_inode, .put_super = ufs_put_super, .write_super = ufs_write_super, .sync_fs = ufs_sync_fs, diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 41dd431ce22..d3b6270cb37 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -44,6 +44,7 @@ #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/sched.h> +#include <linux/quotaops.h> #include "ufs_fs.h" #include "ufs.h" @@ -517,9 +518,18 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + error = dquot_transfer(inode, attr); + if (error) + return error; + } if (ia_valid & ATTR_SIZE && attr->ia_size != i_size_read(inode)) { loff_t old_i_size = inode->i_size; + + dquot_initialize(inode); + error = vmtruncate(inode, attr->ia_size); if (error) return error; diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 01d0e2a3b23..43f9f5d5670 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -106,7 +106,7 @@ extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ extern struct inode *ufs_iget(struct super_block *, unsigned long); -extern int ufs_write_inode (struct inode *, int); +extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_sync_inode (struct inode *); extern void ufs_delete_inode (struct inode *); extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 3d4a0c84d63..1947514ce1a 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -44,20 +44,6 @@ xfs_quota_type(int type) } STATIC int -xfs_fs_quota_sync( - struct super_block *sb, - int type) -{ - struct xfs_mount *mp = XFS_M(sb); - - if (sb->s_flags & MS_RDONLY) - return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - return -xfs_sync_data(mp, 0); -} - -STATIC int xfs_fs_get_xstate( struct super_block *sb, struct fs_quota_stat *fqs) @@ -82,8 +68,6 @@ xfs_fs_set_xstate( return -EROFS; if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; if (uflags & XFS_QUOTA_UDQ_ACCT) flags |= XFS_UQUOTA_ACCT; @@ -144,14 +128,11 @@ xfs_fs_set_xquota( return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); } const struct quotactl_ops xfs_quotactl_operations = { - .quota_sync = xfs_fs_quota_sync, .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, .get_xquota = xfs_fs_get_xquota, diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 25ea2408118..71345a370d9 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1063,7 +1063,7 @@ xfs_log_inode( STATIC int xfs_fs_write_inode( struct inode *inode, - int sync) + struct writeback_control *wbc) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1074,11 +1074,7 @@ xfs_fs_write_inode( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (sync) { - error = xfs_wait_on_pages(ip, 0, -1); - if (error) - goto out; - + if (wbc->sync_mode == WB_SYNC_ALL) { /* * Make sure the inode has hit stable storage. By using the * log and the fsync transactions we reduce the IOs we have |