From 005a59ec745d23f60222f7712adde48f64d7d3c8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Apr 2009 01:27:08 -0400 Subject: Deal with missing exports for hostfs Signed-off-by: Al Viro --- arch/um/include/shared/os.h | 3 +++ arch/um/kernel/ksyms.c | 3 +++ arch/um/os-Linux/file.c | 15 +++++++++++++++ arch/um/os-Linux/user_syms.c | 4 ++++ fs/hostfs/hostfs_user.c | 6 +++--- 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index cd40fddcf99..c4617baaa4f 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -161,6 +161,9 @@ extern int os_stat_filesystem(char *path, long *bsize_out, long *spare_out); extern int os_change_dir(char *dir); extern int os_fchange_dir(int fd); +extern unsigned os_major(unsigned long long dev); +extern unsigned os_minor(unsigned long long dev); +extern unsigned long long os_makedev(unsigned major, unsigned minor); /* start_up.c */ extern void os_early_checks(void); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 836fc9b9470..0ae0dfcfbff 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -58,6 +58,9 @@ EXPORT_SYMBOL(os_accept_connection); EXPORT_SYMBOL(os_rcv_fd); EXPORT_SYMBOL(run_helper); EXPORT_SYMBOL(start_thread); +EXPORT_SYMBOL(os_major); +EXPORT_SYMBOL(os_minor); +EXPORT_SYMBOL(os_makedev); EXPORT_SYMBOL(add_sigio_fd); EXPORT_SYMBOL(ignore_sigio_fd); diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index b5afcfd0f86..140e587bc0a 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -561,3 +561,18 @@ int os_lock_file(int fd, int excl) out: return err; } + +unsigned os_major(unsigned long long dev) +{ + return major(dev); +} + +unsigned os_minor(unsigned long long dev) +{ + return minor(dev); +} + +unsigned long long os_makedev(unsigned major, unsigned minor) +{ + return makedev(major, minor); +} diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c index 89b48a116a8..05f5ea8e83d 100644 --- a/arch/um/os-Linux/user_syms.c +++ b/arch/um/os-Linux/user_syms.c @@ -103,6 +103,10 @@ EXPORT_SYMBOL_PROTO(getuid); EXPORT_SYMBOL_PROTO(fsync); EXPORT_SYMBOL_PROTO(fdatasync); +EXPORT_SYMBOL_PROTO(lstat64); +EXPORT_SYMBOL_PROTO(fstat64); +EXPORT_SYMBOL_PROTO(mknod); + /* Export symbols used by GCC for the stack protector. */ extern void __stack_smash_handler(void *) __attribute__((weak)); EXPORT_SYMBOL(__stack_smash_handler); diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index b79424f9328..4b8c666ba28 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -76,9 +76,9 @@ int file_type(const char *path, int *maj, int *min) * about its definition. */ if (maj != NULL) - *maj = major(buf.st_rdev); + *maj = os_major(buf.st_rdev); if (min != NULL) - *min = minor(buf.st_rdev); + *min = os_minor(buf.st_rdev); if (S_ISDIR(buf.st_mode)) return OS_TYPE_DIR; @@ -361,7 +361,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor) { int err; - err = mknod(file, mode, makedev(major, minor)); + err = mknod(file, mode, os_makedev(major, minor)); if (err) return -errno; return 0; -- cgit v1.2.3-70-g09d2 From 918377b696bff7384923a1ef4bf0af7626cb9b68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 23:56:02 -0400 Subject: missing include in hppfs Signed-off-by: Al Viro --- fs/hppfs/hppfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 826c3f9d29a..943ce751ce1 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "os.h" -- cgit v1.2.3-70-g09d2 From 0e4f6a791b1e8cfde75a74e2f885642ecb3fe9d8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Jul 2010 12:18:57 +0400 Subject: Fix reiserfs_file_release() a) count file openers correctly; i_count use was completely wrong b) use new mutex for exclusion between final close/open/truncate, to protect tailpacking logics. i_mutex use was wrong and resulted in deadlocks. Signed-off-by: Al Viro --- fs/reiserfs/file.c | 50 ++++++++++++++++++++++++------------------- fs/reiserfs/inode.c | 2 -- fs/reiserfs/super.c | 2 ++ include/linux/reiserfs_fs_i.h | 4 ++-- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index b82cdd8a45d..6846371498b 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -38,20 +38,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) BUG_ON(!S_ISREG(inode->i_mode)); + if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1)) + return 0; + + mutex_lock(&(REISERFS_I(inode)->tailpack)); + + if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) { + mutex_unlock(&(REISERFS_I(inode)->tailpack)); + return 0; + } + /* fast out for when nothing needs to be done */ - if ((atomic_read(&inode->i_count) > 1 || - !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || + if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || !tail_has_to_be_packed(inode)) && REISERFS_I(inode)->i_prealloc_count <= 0) { + mutex_unlock(&(REISERFS_I(inode)->tailpack)); return 0; } - mutex_lock(&inode->i_mutex); - - mutex_lock(&(REISERFS_I(inode)->i_mmap)); - if (REISERFS_I(inode)->i_flags & i_ever_mapped) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - reiserfs_write_lock(inode->i_sb); /* freeing preallocation only involves relogging blocks that * are already in the current transaction. preallocation gets @@ -94,9 +98,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) if (!err) err = jbegin_failure; - if (!err && atomic_read(&inode->i_count) <= 1 && + if (!err && (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && tail_has_to_be_packed(inode)) { + /* if regular file is released by last holder and it has been appended (we append by unformatted node only) or its direct item(s) had to be converted, then it may have to be @@ -104,27 +109,28 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) err = reiserfs_truncate_file(inode, 0); } out: - mutex_unlock(&(REISERFS_I(inode)->i_mmap)); - mutex_unlock(&inode->i_mutex); reiserfs_write_unlock(inode->i_sb); + mutex_unlock(&(REISERFS_I(inode)->tailpack)); return err; } -static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int reiserfs_file_open(struct inode *inode, struct file *file) { - struct inode *inode; - - inode = file->f_path.dentry->d_inode; - mutex_lock(&(REISERFS_I(inode)->i_mmap)); - REISERFS_I(inode)->i_flags |= i_ever_mapped; - mutex_unlock(&(REISERFS_I(inode)->i_mmap)); - - return generic_file_mmap(file, vma); + int err = dquot_file_open(inode, file); + if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { + /* somebody might be tailpacking on final close; wait for it */ + mutex_lock(&(REISERFS_I(inode)->tailpack)); + atomic_inc(&REISERFS_I(inode)->openers); + mutex_unlock(&(REISERFS_I(inode)->tailpack)); + } + return err; } static void reiserfs_vfs_truncate_file(struct inode *inode) { + mutex_lock(&(REISERFS_I(inode)->tailpack)); reiserfs_truncate_file(inode, 1); + mutex_unlock(&(REISERFS_I(inode)->tailpack)); } /* Sync a reiserfs file. */ @@ -288,8 +294,8 @@ const struct file_operations reiserfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = reiserfs_compat_ioctl, #endif - .mmap = reiserfs_file_mmap, - .open = dquot_file_open, + .mmap = generic_file_mmap, + .open = reiserfs_file_open, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .aio_read = generic_file_aio_read, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0f22fdaf54a..6edac85c2b9 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1138,7 +1138,6 @@ static void init_inode(struct inode *inode, struct treepath *path) REISERFS_I(inode)->i_prealloc_count = 0; REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_jl = NULL; - mutex_init(&(REISERFS_I(inode)->i_mmap)); reiserfs_init_xattr_rwsem(inode); if (stat_data_v1(ih)) { @@ -1841,7 +1840,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, REISERFS_I(inode)->i_attrs = REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); - mutex_init(&(REISERFS_I(inode)->i_mmap)); reiserfs_init_xattr_rwsem(inode); /* key to search for correct place for new stat data */ diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 9822fa15118..1e1ee9056eb 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -525,6 +525,8 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; + atomic_set(&ei->openers, 0); + mutex_init(&ei->tailpack); return &ei->vfs_inode; } diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h index 89f4d3abbf5..97959bdfe21 100644 --- a/include/linux/reiserfs_fs_i.h +++ b/include/linux/reiserfs_fs_i.h @@ -25,7 +25,6 @@ typedef enum { i_link_saved_truncate_mask = 0x0020, i_has_xattr_dir = 0x0040, i_data_log = 0x0080, - i_ever_mapped = 0x0100 } reiserfs_inode_flags; struct reiserfs_inode_info { @@ -53,7 +52,8 @@ struct reiserfs_inode_info { ** flushed */ unsigned int i_trans_id; struct reiserfs_journal_list *i_jl; - struct mutex i_mmap; + atomic_t openers; + struct mutex tailpack; #ifdef CONFIG_REISERFS_FS_XATTR struct rw_semaphore i_xattr_sem; #endif -- cgit v1.2.3-70-g09d2 From 256249584bda1a9357e2d29987a37f5b2df035f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Jul 2010 12:23:11 +0400 Subject: fix leak in __logfs_create() if kmalloc fails, we still need to drop the inode, as we do on other failure exits. Signed-off-by: Al Viro --- fs/logfs/dir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 72d1893ddd3..675cc49197f 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -434,8 +434,11 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry, int ret; ta = kzalloc(sizeof(*ta), GFP_KERNEL); - if (!ta) + if (!ta) { + inode->i_nlink--; + iput(inode); return -ENOMEM; + } ta->state = CREATE_1; ta->ino = inode->i_ino; -- cgit v1.2.3-70-g09d2 From eafdc7d190a944c755a9fe68573c193e6e0217e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:53 +0200 Subject: sort out blockdev_direct_IO variants Move the call to vmtruncate to get rid of accessive blocks to the callers in prepearation of the new truncate calling sequence. This was only done for DIO_LOCKING filesystems, so the __blockdev_direct_IO_newtrunc variant was not needed anyway. Get rid of blockdev_direct_IO_no_locking and its _newtrunc variant while at it as just opencoding the two additional paramters is shorted than the name suffix. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/block_dev.c | 5 ++- fs/direct-io.c | 74 ++++++++++++--------------------------------- fs/ext2/inode.c | 2 +- fs/ext3/inode.c | 11 +++++++ fs/ext4/inode.c | 15 +++++++-- fs/fat/inode.c | 4 +-- fs/gfs2/aops.c | 6 ++-- fs/hfs/inode.c | 17 ++++++++++- fs/hfsplus/inode.c | 17 ++++++++++- fs/jfs/inode.c | 17 ++++++++++- fs/nilfs2/inode.c | 13 ++++++++ fs/ocfs2/aops.c | 9 +++--- fs/reiserfs/inode.c | 17 ++++++++++- fs/xfs/linux-2.6/xfs_aops.c | 16 +++++----- include/linux/fs.h | 42 ++++--------------------- 15 files changed, 146 insertions(+), 119 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 99d6af81174..65a0c26508e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -172,9 +172,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode, - I_BDEV(inode), iov, offset, nr_segs, - blkdev_get_blocks, NULL); + return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, + nr_segs, blkdev_get_blocks, NULL, NULL, 0); } int __sync_blockdev(struct block_device *bdev, int wait) diff --git a/fs/direct-io.c b/fs/direct-io.c index a10cb91cade..51f270b479b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1136,8 +1136,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, return ret; } +/* + * This is a library function for use by filesystem drivers. + * + * The locking rules are governed by the flags parameter: + * - if the flags value contains DIO_LOCKING we use a fancy locking + * scheme for dumb filesystems. + * For writes this function is called under i_mutex and returns with + * i_mutex held, for reads, i_mutex is not held on entry, but it is + * taken and dropped again before returning. + * For reads and writes i_alloc_sem is taken in shared mode and released + * on I/O completion (which may happen asynchronously after returning to + * the caller). + * + * - if the flags value does NOT contain DIO_LOCKING we don't use any + * internal locking but rather rely on the filesystem to synchronize + * direct I/O reads/writes versus each other and truncate. + * For reads and writes both i_mutex and i_alloc_sem are not held on + * entry and are never taken. + */ ssize_t -__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, +__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) @@ -1233,57 +1252,4 @@ __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, out: return retval; } -EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc); - -/* - * This is a library function for use by filesystem drivers. - * - * The locking rules are governed by the flags parameter: - * - if the flags value contains DIO_LOCKING we use a fancy locking - * scheme for dumb filesystems. - * For writes this function is called under i_mutex and returns with - * i_mutex held, for reads, i_mutex is not held on entry, but it is - * taken and dropped again before returning. - * For reads and writes i_alloc_sem is taken in shared mode and released - * on I/O completion (which may happen asynchronously after returning to - * the caller). - * - * - if the flags value does NOT contain DIO_LOCKING we don't use any - * internal locking but rather rely on the filesystem to synchronize - * direct I/O reads/writes versus each other and truncate. - * For reads and writes both i_mutex and i_alloc_sem are not held on - * entry and are never taken. - */ -ssize_t -__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) -{ - ssize_t retval; - - retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, - offset, nr_segs, get_block, end_io, submit_io, flags); - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again for DIO_LOCKING. - * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in - * their own manner. This is a further example of where the old - * truncate sequence is inadequate. - * - * NOTE: filesystems with their own locking have to handle this - * on their own. - */ - if (flags & DIO_LOCKING) { - if (unlikely((rw & WRITE) && retval < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - vmtruncate(inode, isize); - } - } - - return retval; -} EXPORT_SYMBOL(__blockdev_direct_IO); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 3675088cb88..f36e967e4fd 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -838,7 +838,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct inode *inode = mapping->host; ssize_t ret; - ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext2_get_block, NULL); if (ret < 0 && (rw & WRITE)) ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 735f0190ec2..a66f3fe3367 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1785,6 +1785,17 @@ retry: ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext3_get_block, NULL); + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0afc8c1d8cf..d6a7701018a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3545,15 +3545,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, retry: if (rw == READ && ext4_should_dioread_nolock(inode)) - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, + ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, - ext4_get_block, NULL); - else + ext4_get_block, NULL, NULL, 0); + else { ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL); + + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 7bf45aee56d..ffe7c6fdc1e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -212,8 +212,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, - iov, offset, nr_segs, fat_get_block, NULL); + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, fat_get_block, NULL); if (ret < 0 && (rw & WRITE)) fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 9f8b52500d6..703000d6e4d 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1047,9 +1047,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ - rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, - iov, offset, nr_segs, - gfs2_get_block_direct, NULL); + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, gfs2_get_block_direct, + NULL, NULL, 0); out: gfs2_glock_dq_m(1, &gh); gfs2_holder_uninit(&gh); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 14f5cb1b9fd..07b2464b571 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -112,9 +112,24 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; + ssize_t ret; - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, hfs_get_block, NULL); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + + return ret; } static int hfs_writepages(struct address_space *mapping, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 9bbb82924a2..48602177391 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -105,9 +105,24 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; + ssize_t ret; - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, hfsplus_get_block, NULL); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + + return ret; } static int hfsplus_writepages(struct address_space *mapping, diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index ed9ba6fe04f..79e6cda2818 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -317,9 +317,24 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + ssize_t ret; - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, jfs_get_block, NULL); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + + return ret; } const struct address_space_operations jfs_aops = { diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 39e038ac8fc..1dd9e6a7d78 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -237,6 +237,19 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, /* Needs synchronization with the cleaner */ size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, nilfs_get_block, NULL); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && size < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + return size; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 96337a4fbbd..0de69c9a08b 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -643,11 +643,10 @@ static ssize_t ocfs2_direct_IO(int rw, if (i_size_read(inode) <= offset) return 0; - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, - inode->i_sb->s_bdev, iov, offset, - nr_segs, - ocfs2_direct_IO_get_blocks, - ocfs2_dio_end_io); + ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, + ocfs2_direct_IO_get_blocks, + ocfs2_dio_end_io, NULL, 0); mlog_exit(ret); return ret; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6edac85c2b9..4c1fb548ab6 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3057,10 +3057,25 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + ssize_t ret; - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, reiserfs_get_blocks_direct_io, NULL); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + + return ret; } int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d24e78f32f3..7968d41e27a 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1478,17 +1478,17 @@ xfs_vm_direct_IO( if (rw & WRITE) { iocb->private = xfs_alloc_ioend(inode, IO_NEW); - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct_write); + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct_write, NULL, 0); if (ret != -EIOCBQUEUED && iocb->private) xfs_destroy_ioend(iocb->private); } else { - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - NULL); + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + NULL, NULL, 0); } return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index f91affb7d53..b347b2d5666 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2269,16 +2269,6 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from) struct bio; typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, loff_t file_offset); -void dio_end_io(struct bio *bio, int error); - -ssize_t __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int lock_type); -ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int lock_type); enum { /* need locking between buffered and direct access */ @@ -2288,24 +2278,13 @@ enum { DIO_SKIP_HOLES = 0x02, }; -static inline ssize_t blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, NULL, - DIO_LOCKING | DIO_SKIP_HOLES); -} +void dio_end_io(struct bio *bio, int error); + +ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags); -static inline ssize_t blockdev_direct_IO_no_locking_newtrunc(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, NULL, 0); -} static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, @@ -2315,15 +2294,6 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, nr_segs, get_block, end_io, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } - -static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, NULL, 0); -} #endif extern const struct file_operations generic_ro_fops; -- cgit v1.2.3-70-g09d2 From ea0f04e59543bafb3d2cbe37a0d375acb0bb2c34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:54 +0200 Subject: get rid of nobh_write_begin_newtrunc Move the call to vmtruncate to get rid of accessive blocks to the only remaining caller and rename the non-truncating version to nobh_write_begin. Get rid of the superflous file argument to it while we're at it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 37 ++++--------------------------------- fs/ext2/inode.c | 9 ++------- fs/jfs/inode.c | 11 ++++++++++- include/linux/buffer_head.h | 6 +----- 4 files changed, 17 insertions(+), 46 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index d54812b198e..559daf76bca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2510,11 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head) } /* - * Filesystems implementing the new truncate sequence should use the - * _newtrunc postfix variant which won't incorrectly call vmtruncate. + * On entry, the page is fully not uptodate. + * On exit the page is fully uptodate in the areas outside (from,to) * The filesystem needs to handle block truncation upon failure. */ -int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, +int nobh_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, get_block_t *get_block) @@ -2547,7 +2547,7 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); *pagep = NULL; - return block_write_begin_newtrunc(file, mapping, pos, len, + return block_write_begin_newtrunc(NULL, mapping, pos, len, flags, pagep, fsdata, get_block); } @@ -2654,35 +2654,6 @@ out_release: return ret; } -EXPORT_SYMBOL(nobh_write_begin_newtrunc); - -/* - * On entry, the page is fully not uptodate. - * On exit the page is fully uptodate in the areas outside (from,to) - */ -int nobh_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block) -{ - int ret; - - ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, get_block); - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - */ - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } - - return ret; -} EXPORT_SYMBOL(nobh_write_begin); int nobh_write_end(struct file *file, struct address_space *mapping, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index f36e967e4fd..348805cd410 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -806,13 +806,8 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping, { int ret; - /* - * Dir-in-pagecache still uses ext2_write_begin. Would have to rework - * directory handling code to pass around offsets rather than struct - * pages in order to make this work easily. - */ - ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep, - fsdata, ext2_get_block); + ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, + ext2_get_block); if (ret < 0) ext2_write_failed(mapping, pos + len); return ret; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 79e6cda2818..c38dc180628 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -303,8 +303,17 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + int ret; + + ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, jfs_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t jfs_bmap(struct address_space *mapping, sector_t block) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1b9ba193b78..cfda5f0b2a4 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -231,11 +231,7 @@ void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int file_fsync(struct file *, int); -int nobh_write_begin_newtrunc(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); -int nobh_write_begin(struct file *, struct address_space *, - loff_t, unsigned, unsigned, +int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); int nobh_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, -- cgit v1.2.3-70-g09d2 From 282dc178849882289d30e58b54be6b2799b351aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:55 +0200 Subject: get rid of cont_write_begin_newtrunc Move the call to vmtruncate to get rid of accessive blocks to the callers in preparation of the new truncate sequence and rename the non-truncating version to cont_write_begin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/adfs/inode.c | 11 ++++++++++- fs/affs/file.c | 11 ++++++++++- fs/buffer.c | 21 +-------------------- fs/fat/inode.c | 2 +- fs/hfs/inode.c | 11 ++++++++++- fs/hfsplus/inode.c | 11 ++++++++++- fs/hpfs/file.c | 11 ++++++++++- fs/qnx4/inode.c | 11 ++++++++++- include/linux/buffer_head.h | 3 --- 9 files changed, 62 insertions(+), 30 deletions(-) diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 6f850b06ab6..b3dec193036 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, adfs_get_block, &ADFS_I(mapping->host)->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/affs/file.c b/fs/affs/file.c index 322710c3eed..c4a9875bd1a 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, affs_get_block, &AFFS_I(mapping->host)->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t _affs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/buffer.c b/fs/buffer.c index 559daf76bca..14529ec759b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2351,7 +2351,7 @@ out: * For moronic filesystems that do not allow holes in file. * We may have to extend the file. */ -int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, +int cont_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, get_block_t *get_block, loff_t *bytes) @@ -2377,25 +2377,6 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, out: return err; } -EXPORT_SYMBOL(cont_write_begin_newtrunc); - -int cont_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block, loff_t *bytes) -{ - int ret; - - ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, get_block, bytes); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } - - return ret; -} EXPORT_SYMBOL(cont_write_begin); int block_prepare_write(struct page *page, unsigned from, unsigned to, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ffe7c6fdc1e..ec6a699a402 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -159,7 +159,7 @@ static int fat_write_begin(struct file *file, struct address_space *mapping, int err; *pagep = NULL; - err = cont_write_begin_newtrunc(file, mapping, pos, len, flags, + err = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, fat_get_block, &MSDOS_I(mapping->host)->mmu_private); if (err < 0) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 07b2464b571..8df18e63eb6 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -39,10 +39,19 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hfs_get_block, &HFS_I(mapping->host)->phys_size); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t hfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 48602177391..88bf1b56264 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -31,10 +31,19 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hfsplus_get_block, &HFSPLUS_I(mapping->host).phys_size); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index a9ae9bfa752..c0340887c7e 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -97,10 +97,19 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hpfs_get_block, &hpfs_i(mapping->host)->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 277575ddc05..16829722be9 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -320,10 +320,19 @@ static int qnx4_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, qnx4_get_block, &qnx4_inode->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index cfda5f0b2a4..7638647f042 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -217,9 +217,6 @@ int generic_write_end(struct file *, struct address_space *, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); -int cont_write_begin_newtrunc(struct file *, struct address_space *, loff_t, - unsigned, unsigned, struct page **, void **, - get_block_t *, loff_t *); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); -- cgit v1.2.3-70-g09d2 From f4e420dc423148fba637af1ab618fa8896dfb2d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:56 +0200 Subject: clean up write_begin usage for directories in pagecache For filesystem that implement directories in pagecache we call block_write_begin with an already allocated page for this code, while the normal regular file write path uses the default block_write_begin behaviour. Get rid of the __foofs_write_begin helper and opencode the normal write_begin call in foofs_write_begin, while adding a new foofs_prepare_chunk helper for the directory code. The added benefit is that foofs_prepare_chunk has a much saner calling convention. Note that the interruptible flag passed into block_write_begin is always ignored if we already pass in a page (see next patch for details), and we never were doing truncations of exessive blocks for this case either so we can switch directly to block_write_begin_newtrunc. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/ext2/dir.c | 24 ++++++++++++------------ fs/ext2/ext2.h | 3 --- fs/ext2/inode.c | 11 ++--------- fs/minix/dir.c | 21 +++++++-------------- fs/minix/inode.c | 11 +++++------ fs/minix/minix.h | 4 +--- fs/nilfs2/dir.c | 26 +++++++------------------- fs/sysv/dir.c | 21 +++++++-------------- fs/sysv/itree.c | 11 +++++------ fs/sysv/sysv.h | 4 +--- fs/ufs/dir.c | 13 ++++--------- fs/ufs/inode.c | 11 +++++------ fs/ufs/util.h | 4 +--- 13 files changed, 57 insertions(+), 107 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 7516957273e..6b946bae11c 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -448,6 +448,12 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) return res; } +static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len) +{ + return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, + &page, NULL, ext2_get_block); +} + /* Releases the page */ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, struct page *page, struct inode *inode, int update_times) @@ -458,8 +464,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, int err; lock_page(page); - err = __ext2_write_begin(NULL, page->mapping, pos, len, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = ext2_prepare_chunk(page, pos, len); BUG_ON(err); de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type(de, inode); @@ -542,8 +547,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) got_it: pos = page_offset(page) + (char*)de - (char*)page_address(page); - err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0, - &page, NULL); + err = ext2_prepare_chunk(page, pos, rec_len); if (err) goto out_unlock; if (de->inode) { @@ -576,8 +580,7 @@ out_unlock: */ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; + struct inode *inode = page->mapping->host; char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); unsigned to = ((char *)dir - kaddr) + @@ -601,8 +604,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) from = (char*)pde - (char*)page_address(page); pos = page_offset(page) + from; lock_page(page); - err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0, - &page, NULL); + err = ext2_prepare_chunk(page, pos, to - from); BUG_ON(err); if (pde) pde->rec_len = ext2_rec_len_to_disk(to - from); @@ -621,8 +623,7 @@ out: */ int ext2_make_empty(struct inode *inode, struct inode *parent) { - struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); + struct page *page = grab_cache_page(inode->i_mapping, 0); unsigned chunk_size = ext2_chunk_size(inode); struct ext2_dir_entry_2 * de; int err; @@ -631,8 +632,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) if (!page) return -ENOMEM; - err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0, - &page, NULL); + err = ext2_prepare_chunk(page, 0, chunk_size); if (err) { unlock_page(page); goto fail; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 52b34f1d273..8f53d11bf95 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -127,9 +127,6 @@ extern void ext2_set_inode_flags(struct inode *inode); extern void ext2_get_inode_flags(struct ext2_inode_info *); extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); -int __ext2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); /* ioctl.c */ extern long ext2_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 348805cd410..2f4dfbcd769 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -765,14 +765,6 @@ ext2_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); } -int __ext2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - return block_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, ext2_get_block); -} - static int ext2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -781,7 +773,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping, int ret; *pagep = NULL; - ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata); + ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, + pagep, fsdata, ext2_get_block); if (ret < 0) ext2_write_failed(mapping, pos + len); return ret; diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 1dbf921ca44..085a9262c69 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -271,8 +271,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode) got_it: pos = page_offset(page) + p - (char *)page_address(page); - err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = minix_prepare_chunk(page, pos, sbi->s_dirsize); if (err) goto out_unlock; memcpy (namx, name, namelen); @@ -297,8 +296,7 @@ out_unlock: int minix_delete_entry(struct minix_dir_entry *de, struct page *page) { - struct address_space *mapping = page->mapping; - struct inode *inode = (struct inode*)mapping->host; + struct inode *inode = page->mapping->host; char *kaddr = page_address(page); loff_t pos = page_offset(page) + (char*)de - kaddr; struct minix_sb_info *sbi = minix_sb(inode->i_sb); @@ -306,8 +304,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) int err; lock_page(page); - err = __minix_write_begin(NULL, mapping, pos, len, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = minix_prepare_chunk(page, pos, len); if (err == 0) { if (sbi->s_version == MINIX_V3) ((minix3_dirent *) de)->inode = 0; @@ -325,16 +322,14 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) int minix_make_empty(struct inode *inode, struct inode *dir) { - struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); + struct page *page = grab_cache_page(inode->i_mapping, 0); struct minix_sb_info *sbi = minix_sb(inode->i_sb); char *kaddr; int err; if (!page) return -ENOMEM; - err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = minix_prepare_chunk(page, 0, 2 * sbi->s_dirsize); if (err) { unlock_page(page); goto fail; @@ -425,8 +420,7 @@ not_empty: void minix_set_link(struct minix_dir_entry *de, struct page *page, struct inode *inode) { - struct address_space *mapping = page->mapping; - struct inode *dir = mapping->host; + struct inode *dir = page->mapping->host; struct minix_sb_info *sbi = minix_sb(dir->i_sb); loff_t pos = page_offset(page) + (char *)de-(char*)page_address(page); @@ -434,8 +428,7 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page, lock_page(page); - err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = minix_prepare_chunk(page, pos, sbi->s_dirsize); if (err == 0) { if (sbi->s_version == MINIX_V3) ((minix3_dirent *) de)->inode = inode->i_ino; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 756f8c93780..f4abe45229b 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -357,12 +357,10 @@ static int minix_readpage(struct file *file, struct page *page) return block_read_full_page(page,minix_get_block); } -int __minix_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - minix_get_block); + return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, + &page, NULL, minix_get_block); } static int minix_write_begin(struct file *file, struct address_space *mapping, @@ -370,7 +368,8 @@ static int minix_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { *pagep = NULL; - return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + minix_get_block); } static sector_t minix_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 111f34ee9e3..407b1c84911 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -53,9 +53,7 @@ extern int minix_new_block(struct inode * inode); extern void minix_free_block(struct inode *inode, unsigned long block); extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); -extern int __minix_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); +extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len); extern void V1_minix_truncate(struct inode *); extern void V2_minix_truncate(struct inode *); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 85c89dfc71f..fc2bcfa599a 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -80,23 +80,11 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static int nilfs_prepare_chunk_uninterruptible(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) +static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to) { loff_t pos = page_offset(page) + from; - return block_write_begin(NULL, mapping, pos, to - from, - AOP_FLAG_UNINTERRUPTIBLE, &page, - NULL, nilfs_get_block); -} - -static int nilfs_prepare_chunk(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) -{ - loff_t pos = page_offset(page) + from; - return block_write_begin(NULL, mapping, pos, to - from, 0, &page, - NULL, nilfs_get_block); + return block_write_begin_newtrunc(NULL, page->mapping, pos, to - from, + 0, &page, NULL, nilfs_get_block); } static void nilfs_commit_chunk(struct page *page, @@ -449,7 +437,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, int err; lock_page(page); - err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); @@ -530,7 +518,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) got_it: from = (char *)de - (char *)page_address(page); to = from + rec_len; - err = nilfs_prepare_chunk(page, page->mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); if (err) goto out_unlock; if (de->inode) { @@ -587,7 +575,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) if (pde) from = (char *)pde - (char *)page_address(page); lock_page(page); - err = nilfs_prepare_chunk(page, mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); BUG_ON(err); if (pde) pde->rec_len = cpu_to_le16(to - from); @@ -615,7 +603,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) if (!page) return -ENOMEM; - err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); + err = nilfs_prepare_chunk(page, 0, chunk_size); if (unlikely(err)) { unlock_page(page); goto fail; diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 79941e4964a..a77c4215762 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -218,8 +218,7 @@ got_it: pos = page_offset(page) + (char*)de - (char*)page_address(page); lock_page(page); - err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE); if (err) goto out_unlock; memcpy (de->name, name, namelen); @@ -239,15 +238,13 @@ out_unlock: int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) { - struct address_space *mapping = page->mapping; - struct inode *inode = (struct inode*)mapping->host; + struct inode *inode = page->mapping->host; char *kaddr = (char*)page_address(page); loff_t pos = page_offset(page) + (char *)de - kaddr; int err; lock_page(page); - err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE); BUG_ON(err); de->inode = 0; err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); @@ -259,16 +256,14 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) int sysv_make_empty(struct inode *inode, struct inode *dir) { - struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); + struct page *page = grab_cache_page(inode->i_mapping, 0); struct sysv_dir_entry * de; char *base; int err; if (!page) return -ENOMEM; - err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = sysv_prepare_chunk(page, 0, 2 * SYSV_DIRSIZE); if (err) { unlock_page(page); goto fail; @@ -341,15 +336,13 @@ not_empty: void sysv_set_link(struct sysv_dir_entry *de, struct page *page, struct inode *inode) { - struct address_space *mapping = page->mapping; - struct inode *dir = mapping->host; + struct inode *dir = page->mapping->host; loff_t pos = page_offset(page) + (char *)de-(char*)page_address(page); int err; lock_page(page); - err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE); BUG_ON(err); de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index f042eec464c..4068f485cfd 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -459,12 +459,10 @@ static int sysv_readpage(struct file *file, struct page *page) return block_read_full_page(page,get_block); } -int __sysv_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - get_block); + return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, + &page, NULL, get_block); } static int sysv_write_begin(struct file *file, struct address_space *mapping, @@ -472,7 +470,8 @@ static int sysv_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { *pagep = NULL; - return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + get_block); } static sector_t sysv_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 94cb9b4d76c..bb55cdb394b 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -136,9 +136,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *); /* itree.c */ extern void sysv_truncate(struct inode *); -extern int __sysv_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); +extern int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len); /* inode.c */ extern struct inode *sysv_iget(struct super_block *, unsigned int); diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index ec784756dc6..dbc90994715 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -95,8 +95,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, int err; lock_page(page); - err = __ufs_write_begin(NULL, page->mapping, pos, len, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = ufs_prepare_chunk(page, pos, len); BUG_ON(err); de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); @@ -381,8 +380,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode) got_it: pos = page_offset(page) + (char*)de - (char*)page_address(page); - err = __ufs_write_begin(NULL, page->mapping, pos, rec_len, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = ufs_prepare_chunk(page, pos, rec_len); if (err) goto out_unlock; if (de->d_ino) { @@ -518,7 +516,6 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, struct page * page) { struct super_block *sb = inode->i_sb; - struct address_space *mapping = page->mapping; char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); @@ -549,8 +546,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, pos = page_offset(page) + from; lock_page(page); - err = __ufs_write_begin(NULL, mapping, pos, to - from, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = ufs_prepare_chunk(page, pos, to - from); BUG_ON(err); if (pde) pde->d_reclen = cpu_to_fs16(sb, to - from); @@ -577,8 +573,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir) if (!page) return -ENOMEM; - err = __ufs_write_begin(NULL, mapping, 0, chunk_size, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = ufs_prepare_chunk(page, 0, chunk_size); if (err) { unlock_page(page); goto fail; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 73fe773aa03..a9555b1ffd2 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -558,12 +558,10 @@ static int ufs_readpage(struct file *file, struct page *page) return block_read_full_page(page,ufs_getfrag_block); } -int __ufs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ufs_getfrag_block); + return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, + &page, NULL, ufs_getfrag_block); } static int ufs_write_begin(struct file *file, struct address_space *mapping, @@ -571,7 +569,8 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { *pagep = NULL; - return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ufs_getfrag_block); } static sector_t ufs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 23ceed8c8fb..0466036912f 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -257,9 +257,7 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value) extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *); extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t); -extern int __ufs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); +extern int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len); /* * These functions manipulate ufs buffers -- cgit v1.2.3-70-g09d2 From 6e1db88d536adcbbfe562b2d4b7d6425784fff12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:57 +0200 Subject: introduce __block_write_begin Split up the block_write_begin implementation - __block_write_begin is a new trivial wrapper for block_prepare_write that always takes an already allocated page and can be either called from block_write_begin or filesystem code that already has a page allocated. Remove the handling of already allocated pages from block_write_begin after switching all callers that do it to __block_write_begin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 69 +++++++++++++++++---------------------------- fs/ext2/dir.c | 3 +- fs/ext3/inode.c | 3 +- fs/ext4/inode.c | 11 +++----- fs/minix/inode.c | 3 +- fs/nilfs2/dir.c | 3 +- fs/reiserfs/inode.c | 3 +- fs/sysv/itree.c | 3 +- fs/ufs/inode.c | 3 +- include/linux/buffer_head.h | 2 ++ 10 files changed, 39 insertions(+), 64 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 14529ec759b..c319c49da51 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) } EXPORT_SYMBOL(page_zero_new_buffers); -static int __block_prepare_write(struct inode *inode, struct page *page, - unsigned from, unsigned to, get_block_t *get_block) +int block_prepare_write(struct page *page, unsigned from, unsigned to, + get_block_t *get_block) { + struct inode *inode = page->mapping->host; unsigned block_start, block_end; sector_t block; int err = 0; @@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (!buffer_uptodate(*wait_bh)) err = -EIO; } - if (unlikely(err)) + if (unlikely(err)) { page_zero_new_buffers(page, from, to); + ClearPageUptodate(page); + } return err; } +EXPORT_SYMBOL(block_prepare_write); static int __block_commit_write(struct inode *inode, struct page *page, unsigned from, unsigned to) @@ -1948,6 +1952,15 @@ static int __block_commit_write(struct inode *inode, struct page *page, return 0; } +int __block_write_begin(struct page *page, loff_t pos, unsigned len, + get_block_t *get_block) +{ + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + + return block_prepare_write(page, start, start + len, get_block); +} +EXPORT_SYMBOL(__block_write_begin); + /* * Filesystems implementing the new truncate sequence should use the * _newtrunc postfix variant which won't incorrectly call vmtruncate. @@ -1958,41 +1971,22 @@ int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata, get_block_t *get_block) { - struct inode *inode = mapping->host; - int status = 0; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; - pgoff_t index; - unsigned start, end; - int ownpage = 0; + int status; - index = pos >> PAGE_CACHE_SHIFT; - start = pos & (PAGE_CACHE_SIZE - 1); - end = start + len; - - page = *pagep; - if (page == NULL) { - ownpage = 1; - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) { - status = -ENOMEM; - goto out; - } - *pagep = page; - } else - BUG_ON(!PageLocked(page)); + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; - status = __block_prepare_write(inode, page, start, end, get_block); + status = __block_write_begin(page, pos, len, get_block); if (unlikely(status)) { - ClearPageUptodate(page); - - if (ownpage) { - unlock_page(page); - page_cache_release(page); - *pagep = NULL; - } + unlock_page(page); + page_cache_release(page); + page = NULL; } -out: + *pagep = page; return status; } EXPORT_SYMBOL(block_write_begin_newtrunc); @@ -2379,17 +2373,6 @@ out: } EXPORT_SYMBOL(cont_write_begin); -int block_prepare_write(struct page *page, unsigned from, unsigned to, - get_block_t *get_block) -{ - struct inode *inode = page->mapping->host; - int err = __block_prepare_write(inode, page, from, to, get_block); - if (err) - ClearPageUptodate(page); - return err; -} -EXPORT_SYMBOL(block_prepare_write); - int block_commit_write(struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 6b946bae11c..764109886ec 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -450,8 +450,7 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, ext2_get_block); + return __block_write_begin(page, pos, len, ext2_get_block); } /* Releases the page */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a66f3fe3367..5c6f07eefa4 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1196,8 +1196,7 @@ retry: ret = PTR_ERR(handle); goto out; } - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext3_get_block); + ret = __block_write_begin(page, pos, len, ext3_get_block); if (ret) goto write_begin_failed; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6a7701018a..3da3c9646e5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1578,11 +1578,9 @@ retry: *pagep = page; if (ext4_should_dioread_nolock(inode)) - ret = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, ext4_get_block_write); + ret = __block_write_begin(page, pos, len, ext4_get_block_write); else - ret = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, ext4_get_block); + ret = __block_write_begin(page, pos, len, ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -1593,7 +1591,7 @@ retry: unlock_page(page); page_cache_release(page); /* - * block_write_begin may have instantiated a few blocks + * __block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. * @@ -3185,8 +3183,7 @@ retry: } *pagep = page; - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_da_get_block_prep); + ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); if (ret < 0) { unlock_page(page); ext4_journal_stop(handle); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index f4abe45229b..6b29e73f0ca 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -359,8 +359,7 @@ static int minix_readpage(struct file *file, struct page *page) int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, minix_get_block); + return __block_write_begin(page, pos, len, minix_get_block); } static int minix_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index fc2bcfa599a..d14e3b94d81 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -83,8 +83,7 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to) { loff_t pos = page_offset(page) + from; - return block_write_begin_newtrunc(NULL, page->mapping, pos, to - from, - 0, &page, NULL, nilfs_get_block); + return __block_write_begin(page, pos, to - from, nilfs_get_block); } static void nilfs_commit_chunk(struct page *page, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 4c1fb548ab6..045729f5674 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2585,8 +2585,7 @@ static int reiserfs_write_begin(struct file *file, old_ref = th->t_refcount; th->t_refcount++; } - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - reiserfs_get_block); + ret = __block_write_begin(page, pos, len, reiserfs_get_block); if (ret && reiserfs_transaction_running(inode->i_sb)) { struct reiserfs_transaction_handle *th = current->journal_info; /* this gets a little ugly. If reiserfs_get_block returned an diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 4068f485cfd..82a005c3d7e 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -461,8 +461,7 @@ static int sysv_readpage(struct file *file, struct page *page) int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, get_block); + return __block_write_begin(page, pos, len, get_block); } static int sysv_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index a9555b1ffd2..45ce32391f8 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -560,8 +560,7 @@ static int ufs_readpage(struct file *file, struct page *page) int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, ufs_getfrag_block); + return __block_write_begin(page, pos, len, ufs_getfrag_block); } static int ufs_write_begin(struct file *file, struct address_space *mapping, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7638647f042..accc9f81bb6 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -209,6 +209,8 @@ int block_write_begin_newtrunc(struct file *, struct address_space *, int block_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); +int __block_write_begin(struct page *page, loff_t pos, unsigned len, + get_block_t *get_block); int block_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); -- cgit v1.2.3-70-g09d2 From 155130a4f7848b1aac439cab6bda1a175507c71c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:58 +0200 Subject: get rid of block_write_begin_newtrunc Move the call to vmtruncate to get rid of accessive blocks to the callers in preparation of the new truncate sequence and rename the non-truncating version to block_write_begin. While we're at it also remove several unused arguments to block_write_begin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/bfs/file.c | 14 ++++++++--- fs/block_dev.c | 5 ++-- fs/buffer.c | 61 +++++++-------------------------------------- fs/ext2/inode.c | 5 ++-- fs/minix/inode.c | 12 +++++++-- fs/nilfs2/inode.c | 12 ++++++--- fs/nilfs2/recovery.c | 11 +++++--- fs/omfs/file.c | 14 ++++++++--- fs/sysv/itree.c | 13 +++++++--- fs/udf/inode.c | 13 +++++++--- fs/ufs/inode.c | 12 +++++++-- fs/xfs/linux-2.6/xfs_aops.c | 14 ++++++++--- include/linux/buffer_head.h | 8 ++---- 13 files changed, 103 insertions(+), 91 deletions(-) diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 88b9a3ff44e..8fc2e9c9739 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -168,9 +168,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, - pagep, fsdata, bfs_get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, + bfs_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t bfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/block_dev.c b/fs/block_dev.c index 65a0c26508e..63c9d607620 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -308,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, blkdev_get_block); + return block_write_begin(mapping, pos, len, flags, pagep, + blkdev_get_block); } static int blkdev_write_end(struct file *file, struct address_space *mapping, diff --git a/fs/buffer.c b/fs/buffer.c index c319c49da51..50efa339e05 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1962,14 +1962,13 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, EXPORT_SYMBOL(__block_write_begin); /* - * Filesystems implementing the new truncate sequence should use the - * _newtrunc postfix variant which won't incorrectly call vmtruncate. + * block_write_begin takes care of the basic task of block allocation and + * bringing partial write blocks uptodate first. + * * The filesystem needs to handle block truncation upon failure. */ -int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block) +int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, + unsigned flags, struct page **pagep, get_block_t *get_block) { pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; @@ -1989,44 +1988,6 @@ int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, *pagep = page; return status; } -EXPORT_SYMBOL(block_write_begin_newtrunc); - -/* - * block_write_begin takes care of the basic task of block allocation and - * bringing partial write blocks uptodate first. - * - * If *pagep is not NULL, then block_write_begin uses the locked page - * at *pagep rather than allocating its own. In this case, the page will - * not be unlocked or deallocated on failure. - */ -int block_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block) -{ - int ret; - - ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, get_block); - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - * - * Filesystems which pass down their own page also cannot - * call into vmtruncate here because it would lead to lock - * inversion problems (*pagep is locked). This is a further - * example of where the old truncate sequence is inadequate. - */ - if (unlikely(ret) && *pagep == NULL) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } - - return ret; -} EXPORT_SYMBOL(block_write_begin); int block_write_end(struct file *file, struct address_space *mapping, @@ -2357,7 +2318,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping, err = cont_expand_zero(file, mapping, pos, bytes); if (err) - goto out; + return err; zerofrom = *bytes & ~PAGE_CACHE_MASK; if (pos+len > *bytes && zerofrom & (blocksize-1)) { @@ -2365,11 +2326,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping, (*bytes)++; } - *pagep = NULL; - err = block_write_begin_newtrunc(file, mapping, pos, len, - flags, pagep, fsdata, get_block); -out: - return err; + return block_write_begin(mapping, pos, len, flags, pagep, get_block); } EXPORT_SYMBOL(cont_write_begin); @@ -2511,8 +2468,8 @@ int nobh_write_begin(struct address_space *mapping, unlock_page(page); page_cache_release(page); *pagep = NULL; - return block_write_begin_newtrunc(NULL, mapping, pos, len, - flags, pagep, fsdata, get_block); + return block_write_begin(mapping, pos, len, flags, pagep, + get_block); } if (PageMappedToDisk(page)) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 2f4dfbcd769..74dfe5f7333 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -772,9 +772,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping, { int ret; - *pagep = NULL; - ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, ext2_get_block); + ret = block_write_begin(mapping, pos, len, flags, pagep, + ext2_get_block); if (ret < 0) ext2_write_failed(mapping, pos + len); return ret; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 6b29e73f0ca..125062f55ef 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -366,9 +366,17 @@ static int minix_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, minix_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t minix_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 1dd9e6a7d78..5c694ece172 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -197,11 +197,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - *pagep = NULL; - err = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(mapping, pos, len, flags, pagep, + nilfs_get_block); + if (unlikely(err)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + nilfs_transaction_abort(inode->i_sb); + } return err; } diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index bae2a516b4e..2f11f0868d8 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -505,11 +505,14 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, } pos = rb->blkoff << inode->i_blkbits; - page = NULL; - err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, - 0, &page, NULL, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(inode->i_mapping, pos, blocksize, + 0, &page, nilfs_get_block); + if (unlikely(err)) { + loff_t isize = inode->i_size; + if (pos + blocksize > isize) + vmtruncate(inode, isize); goto failed_inode; + } err = nilfs_recovery_copy_block(sbi, rb, page); if (unlikely(err)) diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 6e7a3291bbe..810cff34646 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -312,9 +312,17 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, - pagep, fsdata, omfs_get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, + omfs_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t omfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 82a005c3d7e..9ca66276315 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -468,9 +468,16 @@ static int sysv_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t sysv_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 124852bcf6f..ecddcc2ed74 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -127,9 +127,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - udf_get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t udf_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 45ce32391f8..45cafa937a4 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -567,9 +567,17 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, ufs_getfrag_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t ufs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7968d41e27a..bf7aad0d78b 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1504,9 +1504,17 @@ xfs_vm_write_begin( struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS, - pagep, fsdata, xfs_get_blocks); + int ret; + + ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, + pagep, xfs_get_blocks); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } STATIC sector_t diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index accc9f81bb6..3f69054f86d 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -203,12 +203,8 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, int block_read_full_page(struct page*, get_block_t*); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from); -int block_write_begin_newtrunc(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); -int block_write_begin(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); +int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, + unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(struct file *, struct address_space *, -- cgit v1.2.3-70-g09d2 From d39aae9ec447dda84d9a2850743a78a535a71c90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:59 +0200 Subject: add missing setattr methods For the new truncate sequence every filesystem that wants to truncate on-disk state needs a seattr method. Convert the remaining filesystems that implement the truncate inode operation to have its own setattr method. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/hfsplus/inode.c | 12 ++++++++++++ fs/minix/file.c | 12 ++++++++++++ fs/omfs/file.c | 12 ++++++++++++ fs/sysv/file.c | 12 ++++++++++++ fs/udf/file.c | 12 ++++++++++++ 5 files changed, 60 insertions(+) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 88bf1b56264..d6ebe53fbdb 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -290,9 +290,21 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) return 0; } +static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, attr); + if (error) + return error; + return inode_setattr(inode, attr); +} + static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, + .setattr = hfsplus_setattr, .setxattr = hfsplus_setxattr, .getxattr = hfsplus_getxattr, .listxattr = hfsplus_listxattr, diff --git a/fs/minix/file.c b/fs/minix/file.c index d5320ff23fa..7a45dd1fe2e 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -23,7 +23,19 @@ const struct file_operations minix_file_operations = { .splice_read = generic_file_splice_read, }; +static int minix_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, attr); + if (error) + return error; + return inode_setattr(inode, attr); +} + const struct inode_operations minix_file_inode_operations = { .truncate = minix_truncate, + .setattr = minix_setattr, .getattr = minix_getattr, }; diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 810cff34646..78c9f0c1a2f 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -341,7 +341,19 @@ const struct file_operations omfs_file_operations = { .splice_read = generic_file_splice_read, }; +static int omfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, attr); + if (error) + return error; + return inode_setattr(inode, attr); +} + const struct inode_operations omfs_file_inops = { + .setattr = omfs_setattr, .truncate = omfs_truncate }; diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 750cc22349b..94f6319292a 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -30,7 +30,19 @@ const struct file_operations sysv_file_operations = { .splice_read = generic_file_splice_read, }; +static int sysv_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, attr); + if (error) + return error; + return inode_setattr(inode, attr); +} + const struct inode_operations sysv_file_inode_operations = { .truncate = sysv_truncate, + .setattr = sysv_setattr, .getattr = sysv_getattr, }; diff --git a/fs/udf/file.c b/fs/udf/file.c index 94e06d6bddb..7376032c89c 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -228,6 +228,18 @@ const struct file_operations udf_file_operations = { .llseek = generic_file_llseek, }; +static int udf_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = inode_change_ok(inode, attr); + if (error) + return error; + return inode_setattr(inode, attr); +} + const struct inode_operations udf_file_inode_operations = { + .setattr = udf_setattr, .truncate = udf_truncate, }; -- cgit v1.2.3-70-g09d2 From 6a1a90ad1b0edb556a7550a6ef8a8756f0304dd5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:30:00 +0200 Subject: rename generic_setattr Despite its name it's now a generic implementation of ->setattr, but rather a helper to copy attributes from a struct iattr to the inode. Rename it to setattr_copy to reflect this fact. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/attr.c | 14 +++++++------- fs/ext2/inode.c | 2 +- fs/fat/file.c | 2 +- fs/libfs.c | 3 +-- fs/ramfs/file-nommu.c | 2 +- fs/sysfs/inode.c | 2 +- include/linux/fs.h | 2 +- mm/shmem.c | 2 +- 8 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index b4fa3b0aa59..1f6a895e24e 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -105,13 +105,13 @@ out_big: EXPORT_SYMBOL(inode_newsize_ok); /** - * generic_setattr - copy simple metadata updates into the generic inode + * setattr_copy - copy simple metadata updates into the generic inode * @inode: the inode to be updated * @attr: the new attributes * - * generic_setattr must be called with i_mutex held. + * setattr_copy must be called with i_mutex held. * - * generic_setattr updates the inode's metadata with that specified + * setattr_copy updates the inode's metadata with that specified * in attr. Noticably missing is inode size update, which is more complex * as it requires pagecache updates. See simple_setsize. * @@ -119,7 +119,7 @@ EXPORT_SYMBOL(inode_newsize_ok); * that for "simple" filesystems, the struct inode is the inode storage. * The caller is free to mark the inode dirty afterwards if needed. */ -void generic_setattr(struct inode *inode, const struct iattr *attr) +void setattr_copy(struct inode *inode, const struct iattr *attr) { unsigned int ia_valid = attr->ia_valid; @@ -144,11 +144,11 @@ void generic_setattr(struct inode *inode, const struct iattr *attr) inode->i_mode = mode; } } -EXPORT_SYMBOL(generic_setattr); +EXPORT_SYMBOL(setattr_copy); /* * note this function is deprecated, the new truncate sequence should be - * used instead -- see eg. simple_setsize, generic_setattr. + * used instead -- see eg. simple_setsize, setattr_copy. */ int inode_setattr(struct inode *inode, const struct iattr *attr) { @@ -163,7 +163,7 @@ int inode_setattr(struct inode *inode, const struct iattr *attr) return error; } - generic_setattr(inode, attr); + setattr_copy(inode, attr); mark_inode_dirty(inode); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 74dfe5f7333..7dee7b3f368 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1544,7 +1544,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; } - generic_setattr(inode, iattr); + setattr_copy(inode, iattr); if (iattr->ia_valid & ATTR_MODE) error = ext2_acl_chmod(inode); mark_inode_dirty(inode); diff --git a/fs/fat/file.c b/fs/fat/file.c index 990dfae022e..20813d2c7d6 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -446,7 +446,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) goto out; } - generic_setattr(inode, attr); + setattr_copy(inode, attr); mark_inode_dirty(inode); out: return error; diff --git a/fs/libfs.c b/fs/libfs.c index dcaf972cbf1..861a8879771 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -395,8 +395,7 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr) return error; } - generic_setattr(inode, iattr); - + setattr_copy(inode, iattr); return error; } EXPORT_SYMBOL(simple_setattr); diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index d532c20fc17..8d44f0347b2 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -183,7 +183,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) } } - generic_setattr(inode, ia); + setattr_copy(inode, ia); out: ia->ia_valid = old_ia_valid; return ret; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 0835a3b70e0..7e187fbd3d4 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -122,7 +122,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) goto out; /* this ignores size changes */ - generic_setattr(inode, iattr); + setattr_copy(inode, iattr); out: mutex_unlock(&sysfs_mutex); diff --git a/include/linux/fs.h b/include/linux/fs.h index b347b2d5666..8ebb5f01a41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2393,7 +2393,7 @@ extern int buffer_migrate_page(struct address_space *, extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern int __must_check inode_setattr(struct inode *, const struct iattr *); -extern void generic_setattr(struct inode *inode, const struct iattr *attr); +extern void setattr_copy(struct inode *inode, const struct iattr *attr); extern void file_update_time(struct file *file); diff --git a/mm/shmem.c b/mm/shmem.c index f65f84062db..3b58ad65d26 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -811,7 +811,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (!error) - generic_setattr(inode, attr); + setattr_copy(inode, attr); #ifdef CONFIG_TMPFS_POSIX_ACL if (!error && (attr->ia_valid & ATTR_MODE)) error = generic_acl_chmod(inode); -- cgit v1.2.3-70-g09d2 From eef2380c187890816b73b1a4cb89a09203759469 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:30:01 +0200 Subject: default to simple_setattr With the new truncate sequence every filesystem that wants to support file size changes on disk needs to implement its own ->setattr. So instead of calling inode_setattr which supports size changes call into a simple method that doesn't support this. simple_setattr is almost what we want except that it does not mark the inode dirty after changes. Given that marking the inode dirty is a no-op for the simple in-memory filesystems that use simple_setattr currently just add the mark_inode_dirty call. Also add a WARN_ON for the presence of a truncate method to simple_setattr to catch new instances of it during the transition period. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/attr.c | 9 +++------ fs/libfs.c | 16 +++++++++++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 1f6a895e24e..aeac826f477 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -237,13 +237,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr) if (ia_valid & ATTR_SIZE) down_write(&dentry->d_inode->i_alloc_sem); - if (inode->i_op && inode->i_op->setattr) { + if (inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); - } else { - error = inode_change_ok(inode, attr); - if (!error) - error = inode_setattr(inode, attr); - } + else + error = simple_setattr(dentry, attr); if (ia_valid & ATTR_SIZE) up_write(&dentry->d_inode->i_alloc_sem); diff --git a/fs/libfs.c b/fs/libfs.c index 861a8879771..40562224b71 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -370,21 +370,26 @@ int simple_setsize(struct inode *inode, loff_t newsize) EXPORT_SYMBOL(simple_setsize); /** - * simple_setattr - setattr for simple in-memory filesystem + * simple_setattr - setattr for simple filesystem * @dentry: dentry * @iattr: iattr structure * * Returns 0 on success, -error on failure. * - * simple_setattr implements setattr for an in-memory filesystem which - * does not store its own file data or metadata (eg. uses the page cache - * and inode cache as its data store). + * simple_setattr is a simple ->setattr implementation without a proper + * implementation of size changes. + * + * It can either be used for in-memory filesystems or special files + * on simple regular filesystems. Anything that needs to change on-disk + * or wire state on size changes needs its own setattr method. */ int simple_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; int error; + WARN_ON_ONCE(inode->i_op->truncate); + error = inode_change_ok(inode, iattr); if (error) return error; @@ -396,7 +401,8 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr) } setattr_copy(inode, iattr); - return error; + mark_inode_dirty(inode); + return 0; } EXPORT_SYMBOL(simple_setattr); -- cgit v1.2.3-70-g09d2 From 1025774ce411f2bd4b059ad7b53f0003569b74fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:30:02 +0200 Subject: remove inode_setattr Replace inode_setattr with opencoded variants of it in all callers. This moves the remaining call to vmtruncate into the filesystem methods where it can be replaced with the proper truncate sequence. In a few cases it was obvious that we would never end up calling vmtruncate so it was left out in the opencoded variant: spufs: explicitly checks for ATTR_SIZE earlier btrfs,hugetlbfs,logfs,dlmfs: explicitly clears ATTR_SIZE earlier ufs: contains an opencoded simple_seattr + truncate that sets the filesize just above In addition to that ncpfs called inode_setattr with handcrafted iattrs, which allowed to trim down the opencoded variant. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 4 +- drivers/staging/pohmelfs/inode.c | 14 +++-- fs/9p/vfs_inode.c | 15 ++++- fs/affs/inode.c | 13 ++++- fs/attr.c | 25 -------- fs/btrfs/inode.c | 12 ++-- fs/cifs/inode.c | 45 ++++++++++---- fs/exofs/inode.c | 14 ++++- fs/ext3/inode.c | 12 +++- fs/ext4/inode.c | 16 +++-- fs/gfs2/inode.c | 25 +++++--- fs/gfs2/ops_inode.c | 12 +++- fs/gfs2/xattr.c | 24 ++++++-- fs/hfs/inode.c | 12 +++- fs/hfsplus/inode.c | 12 +++- fs/hostfs/hostfs_kern.c | 18 +++++- fs/hpfs/inode.c | 12 +++- fs/hugetlbfs/inode.c | 17 +++--- fs/jfs/file.c | 14 ++++- fs/logfs/file.c | 18 +++--- fs/minix/file.c | 12 +++- fs/ncpfs/inode.c | 24 ++++---- fs/nilfs2/inode.c | 25 ++++++-- fs/ntfs/inode.c | 3 - fs/ocfs2/dlmfs/dlmfs.c | 8 ++- fs/ocfs2/file.c | 16 +++-- fs/omfs/file.c | 12 +++- fs/proc/base.c | 16 ++++- fs/proc/generic.c | 18 ++++-- fs/proc/proc_sysctl.c | 15 ++++- fs/reiserfs/inode.c | 97 +++++++++++++++++-------------- fs/sysv/file.c | 12 +++- fs/udf/file.c | 12 +++- fs/ufs/truncate.c | 5 +- include/linux/fs.h | 1 - 35 files changed, 416 insertions(+), 194 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index e5e5f823d68..32625f366fb 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -110,7 +110,9 @@ spufs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && (attr->ia_size != inode->i_size)) return -EINVAL; - return inode_setattr(inode, attr); + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index 643b413d9f0..e818f53ccfd 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -968,12 +968,18 @@ int pohmelfs_setattr_raw(struct inode *inode, struct iattr *attr) goto err_out_exit; } - err = inode_setattr(inode, attr); - if (err) { - dprintk("%s: ino: %llu, failed to set the attributes.\n", __func__, POHMELFS_I(inode)->ino); - goto err_out_exit; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + err = vmtruncate(inode, attr->ia_size); + if (err) { + dprintk("%s: ino: %llu, failed to set the attributes.\n", __func__, POHMELFS_I(inode)->ino); + goto err_out_exit; + } } + setattr_copy(inode, attr); + mark_inode_dirty(inode); + dprintk("%s: ino: %llu, mode: %o -> %o, uid: %u -> %u, gid: %u -> %u, size: %llu -> %llu.\n", __func__, POHMELFS_I(inode)->ino, inode->i_mode, attr->ia_mode, inode->i_uid, attr->ia_uid, inode->i_gid, attr->ia_gid, inode->i_size, attr->ia_size); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4331b3b5ee1..4b3ad6ac9a4 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -896,10 +896,19 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) } retval = p9_client_wstat(fid, &wstat); - if (retval >= 0) - retval = inode_setattr(dentry->d_inode, iattr); + if (retval < 0) + return retval; + + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(dentry->d_inode)) { + retval = vmtruncate(dentry->d_inode, iattr->ia_size); + if (retval) + return retval; + } - return retval; + setattr_copy(dentry->d_inode, iattr); + mark_inode_dirty(dentry->d_inode); + return 0; } /** diff --git a/fs/affs/inode.c b/fs/affs/inode.c index f4b2a4ee4f9..6883d5fb84c 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -235,8 +235,17 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr) goto out; } - error = inode_setattr(inode, attr); - if (!error && (attr->ia_valid & ATTR_MODE)) + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + + if (attr->ia_valid & ATTR_MODE) mode_to_prot(inode); out: return error; diff --git a/fs/attr.c b/fs/attr.c index aeac826f477..ed44d8ae8bf 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -146,31 +146,6 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) } EXPORT_SYMBOL(setattr_copy); -/* - * note this function is deprecated, the new truncate sequence should be - * used instead -- see eg. simple_setsize, setattr_copy. - */ -int inode_setattr(struct inode *inode, const struct iattr *attr) -{ - unsigned int ia_valid = attr->ia_valid; - - if (ia_valid & ATTR_SIZE && - attr->ia_size != i_size_read(inode)) { - int error; - - error = vmtruncate(inode, attr->ia_size); - if (error) - return error; - } - - setattr_copy(inode, attr); - - mark_inode_dirty(inode); - - return 0; -} -EXPORT_SYMBOL(inode_setattr); - int notify_change(struct dentry * dentry, struct iattr * attr) { struct inode *inode = dentry->d_inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1bff92ad474..7f9e0536db1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3656,13 +3656,15 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; } - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid) - err = inode_setattr(inode, attr); + if (attr->ia_valid) { + setattr_copy(inode, attr); + mark_inode_dirty(inode); + + if (attr->ia_valid & ATTR_MODE) + err = btrfs_acl_chmod(inode); + } - if (!err && ((attr->ia_valid & ATTR_MODE))) - err = btrfs_acl_chmod(inode); return err; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a15b3a9bbff..9c6a40f5cc5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1889,18 +1889,27 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) CIFS_MOUNT_MAP_SPECIAL_CHR); } - if (!rc) { - rc = inode_setattr(inode, attrs); + if (rc) + goto out; - /* force revalidate when any of these times are set since some - of the fs types (eg ext3, fat) do not have fine enough - time granularity to match protocol, and we do not have a - a way (yet) to query the server fs's time granularity (and - whether it rounds times down). - */ - if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))) - cifsInode->time = 0; + if ((attrs->ia_valid & ATTR_SIZE) && + attrs->ia_size != i_size_read(inode)) { + rc = vmtruncate(inode, attrs->ia_size); + if (rc) + goto out; } + + setattr_copy(inode, attrs); + mark_inode_dirty(inode); + + /* force revalidate when any of these times are set since some + of the fs types (eg ext3, fat) do not have fine enough + time granularity to match protocol, and we do not have a + a way (yet) to query the server fs's time granularity (and + whether it rounds times down). + */ + if (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)) + cifsInode->time = 0; out: kfree(args); kfree(full_path); @@ -2040,8 +2049,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) /* do not need local check to inode_check_ok since the server does that */ - if (!rc) - rc = inode_setattr(inode, attrs); + if (rc) + goto cifs_setattr_exit; + + if ((attrs->ia_valid & ATTR_SIZE) && + attrs->ia_size != i_size_read(inode)) { + rc = vmtruncate(inode, attrs->ia_size); + if (rc) + goto cifs_setattr_exit; + } + + setattr_copy(inode, attrs); + mark_inode_dirty(inode); + return 0; + cifs_setattr_exit: kfree(full_path); FreeXid(xid); diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 4bb6ef822e4..4bfc1f4fd01 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -887,8 +887,18 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; - error = inode_setattr(inode, iattr); - return error; + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + int error; + + error = vmtruncate(inode, iattr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + return 0; } static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5c6f07eefa4..b04d1193668 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3208,9 +3208,17 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) ext3_journal_stop(handle); } - rc = inode_setattr(inode, attr); + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + rc = vmtruncate(inode, attr->ia_size); + if (rc) + goto err_out; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); - if (!rc && (ia_valid & ATTR_MODE)) + if (ia_valid & ATTR_MODE) rc = ext3_acl_chmod(inode); err_out: diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3da3c9646e5..1fb390359bc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5539,11 +5539,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_truncate(inode); } - rc = inode_setattr(inode, attr); + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) + rc = vmtruncate(inode, attr->ia_size); - /* If inode_setattr's call to ext4_truncate failed to get a - * transaction handle at all, we need to clean up the in-core - * orphan list manually. */ + if (!rc) { + setattr_copy(inode, attr); + mark_inode_dirty(inode); + } + + /* + * If the call to ext4_truncate failed to get a transaction handle at + * all, we need to clean up the in-core orphan list manually. + */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index f03afd9c44b..6c023a3b5d2 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -991,18 +991,29 @@ fail: static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) { + struct inode *inode = &ip->i_inode; struct buffer_head *dibh; int error; error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - error = inode_setattr(&ip->i_inode, attr); - gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); + if (error) + return error; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; } - return error; + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + + gfs2_assert_warn(GFS2_SB(inode), !error); + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + return 0; } /** diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 98cdd05f331..d7d410a4ca4 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1136,8 +1136,16 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) goto out_end_trans; - error = inode_setattr(inode, attr); - gfs2_assert_warn(sdp, !error); + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + int error; + + error = vmtruncate(inode, attr->ia_size); + gfs2_assert_warn(sdp, !error); + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 82f93da00d1..776af6eb4bc 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1296,6 +1296,7 @@ fail: int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) { + struct inode *inode = &ip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_ea_location el; struct buffer_head *dibh; @@ -1321,14 +1322,25 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) return error; error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - error = inode_setattr(&ip->i_inode, attr); - gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); + if (error) + goto out_trans_end; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + int error; + + error = vmtruncate(inode, attr->ia_size); + gfs2_assert_warn(GFS2_SB(inode), !error); } + setattr_copy(inode, attr); + mark_inode_dirty(inode); + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + +out_trans_end: gfs2_trans_end(sdp); return error; } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 8df18e63eb6..87de671baa8 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -612,10 +612,16 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) attr->ia_mode = inode->i_mode & ~S_IWUGO; attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; } - error = inode_setattr(inode, attr); - if (error) - return error; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); return 0; } diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d6ebe53fbdb..654c5a8ddf1 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -298,7 +298,17 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (error) return error; - return inode_setattr(inode, attr); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } static const struct inode_operations hfsplus_file_inode_operations = { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 87ac1891a18..7943ff11d48 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -849,13 +849,14 @@ int hostfs_permission(struct inode *ino, int desired) int hostfs_setattr(struct dentry *dentry, struct iattr *attr) { + struct inode *inode = dentry->d_inode; struct hostfs_iattr attrs; char *name; int err; - int fd = HOSTFS_I(dentry->d_inode)->fd; + int fd = HOSTFS_I(inode)->fd; - err = inode_change_ok(dentry->d_inode, attr); + err = inode_change_ok(inode, attr); if (err) return err; @@ -905,7 +906,18 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - return inode_setattr(dentry->d_inode, attr); + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + int error; + + error = vmtruncate(inode, attr->ia_size); + if (err) + return err; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } static const struct inode_operations hostfs_iops = { diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 1042a9bc97f..3f3b397fd4e 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -277,9 +277,15 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) if (error) goto out_unlock; - error = inode_setattr(inode, attr); - if (error) - goto out_unlock; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); hpfs_write_inode(inode); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a4e9a7ec369..d5f019d48b0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -448,19 +448,20 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (error) - goto out; + return error; if (ia_valid & ATTR_SIZE) { error = -EINVAL; - if (!(attr->ia_size & ~huge_page_mask(h))) - error = hugetlb_vmtruncate(inode, attr->ia_size); + if (attr->ia_size & ~huge_page_mask(h)) + return -EINVAL; + error = hugetlb_vmtruncate(inode, attr->ia_size); if (error) - goto out; - attr->ia_valid &= ~ATTR_SIZE; + return error; } - error = inode_setattr(inode, attr); -out: - return error; + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 127263cc865..c5ce6c1d1ff 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include "jfs_incore.h" @@ -107,11 +108,18 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) return rc; } - rc = inode_setattr(inode, iattr); + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + rc = vmtruncate(inode, iattr->ia_size); + if (rc) + return rc; + } - if (!rc && (iattr->ia_valid & ATTR_MODE)) - rc = jfs_acl_chmod(inode); + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + if (iattr->ia_valid & ATTR_MODE) + rc = jfs_acl_chmod(inode); return rc; } diff --git a/fs/logfs/file.c b/fs/logfs/file.c index abe1cafbd4c..23b4d03bbd2 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -232,15 +232,19 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; int err = 0; - if (attr->ia_valid & ATTR_SIZE) + if (attr->ia_valid & ATTR_SIZE) { err = logfs_truncate(inode, attr->ia_size); - attr->ia_valid &= ~ATTR_SIZE; + if (err) + return err; + } - if (!err) - err = inode_change_ok(inode, attr); - if (!err) - err = inode_setattr(inode, attr); - return err; + err = inode_change_ok(inode, attr); + if (err) + return err; + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } const struct inode_operations logfs_reg_iops = { diff --git a/fs/minix/file.c b/fs/minix/file.c index 7a45dd1fe2e..4493ce695ab 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -31,7 +31,17 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (error) return error; - return inode_setattr(inode, attr); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } const struct inode_operations minix_file_inode_operations = { diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index fa338515402..b4e8aaae14b 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -924,9 +924,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) tmpattr.ia_valid = ATTR_MODE; tmpattr.ia_mode = attr->ia_mode; - result = inode_setattr(inode, &tmpattr); - if (result) - goto out; + setattr_copy(inode, &tmpattr); + mark_inode_dirty(inode); } } #endif @@ -954,15 +953,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) result = ncp_make_closed(inode); if (result) goto out; - { - struct iattr tmpattr; - - tmpattr.ia_valid = ATTR_SIZE; - tmpattr.ia_size = attr->ia_size; - - result = inode_setattr(inode, &tmpattr); + + if (attr->ia_size != i_size_read(inode)) { + result = vmtruncate(inode, attr->ia_size); if (result) goto out; + mark_inode_dirty(inode); } } if ((attr->ia_valid & ATTR_CTIME) != 0) { @@ -1002,8 +998,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) NCP_FINFO(inode)->nwattr = info.attributes; #endif } - if (!result) - result = inode_setattr(inode, attr); + if (result) + goto out; + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + out: unlock_kernel(); return result; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 5c694ece172..051d279abb3 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -656,14 +656,27 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) err = nilfs_transaction_begin(sb, &ti, 0); if (unlikely(err)) return err; - err = inode_setattr(inode, iattr); - if (!err && (iattr->ia_valid & ATTR_MODE)) + + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + err = vmtruncate(inode, iattr->ia_size); + if (unlikely(err)) + goto out_err; + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + + if (iattr->ia_valid & ATTR_MODE) { err = nilfs_acl_chmod(inode); - if (likely(!err)) - err = nilfs_transaction_commit(sb); - else - nilfs_transaction_abort(sb); + if (unlikely(err)) + goto out_err; + } + + return nilfs_transaction_commit(sb); +out_err: + nilfs_transaction_abort(sb); return err; } diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4b57fb1eac2..fdef8f729c3 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2879,9 +2879,6 @@ void ntfs_truncate_vfs(struct inode *vi) { * * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also * called with ->i_alloc_sem held for writing. - * - * Basically this is a copy of generic notify_change() and inode_setattr() - * functionality, except we intercept and abort changes in i_size. */ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) { diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b83d6107a1f..85e4ccaedd1 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -214,10 +214,12 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_SIZE; error = inode_change_ok(inode, attr); - if (!error) - error = inode_setattr(inode, attr); + if (error) + return error; - return error; + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2b10b36d157..584cf8ac167 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1238,13 +1238,21 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) * Otherwise, we could get into problems with truncate as * ip_alloc_sem is used there to protect against i_size * changes. + * + * XXX: this means the conditional below can probably be removed. */ - status = inode_setattr(inode, attr); - if (status < 0) { - mlog_errno(status); - goto bail_commit; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + status = vmtruncate(inode, attr->ia_size); + if (status) { + mlog_errno(status); + goto bail_commit; + } } + setattr_copy(inode, attr); + mark_inode_dirty(inode); + status = ocfs2_mark_inode_dirty(handle, inode, bh); if (status < 0) mlog_errno(status); diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 78c9f0c1a2f..5542c284dc1 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -349,7 +349,17 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (error) return error; - return inode_setattr(inode, attr); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } const struct inode_operations omfs_file_inops = { diff --git a/fs/proc/base.c b/fs/proc/base.c index acb7ef80ea4..a49d9dd06d1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -561,9 +561,19 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr) return -EPERM; error = inode_change_ok(inode, attr); - if (!error) - error = inode_setattr(inode, attr); - return error; + if (error) + return error; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } static const struct inode_operations proc_def_inode_operations = { diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2791907744e..dd29f033766 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -258,17 +259,22 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) error = inode_change_ok(inode, iattr); if (error) - goto out; + return error; - error = inode_setattr(inode, iattr); - if (error) - goto out; + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, iattr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); de->uid = inode->i_uid; de->gid = inode->i_gid; de->mode = inode->i_mode; -out: - return error; + return 0; } static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6ff9981f0a1..5be436ea088 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -329,10 +329,19 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) return -EPERM; error = inode_change_ok(inode, attr); - if (!error) - error = inode_setattr(inode, attr); + if (error) + return error; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } - return error; + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 045729f5674..2b8dc5c2286 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3134,55 +3134,62 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) } error = inode_change_ok(inode, attr); - if (!error) { - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - error = reiserfs_chown_xattrs(inode, attr); + if (error) + goto out; - if (!error) { - struct reiserfs_transaction_handle th; - int jbegin_count = - 2 * - (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + - REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + - 2; - - /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ - error = - journal_begin(&th, inode->i_sb, - jbegin_count); - if (error) - goto out; - error = dquot_transfer(inode, attr); - if (error) { - journal_end(&th, inode->i_sb, - jbegin_count); - goto out; - } - /* Update corresponding info in inode so that everything is in - * one transaction */ - if (attr->ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - mark_inode_dirty(inode); - error = - journal_end(&th, inode->i_sb, jbegin_count); - } - } - if (!error) { - /* - * Relax the lock here, as it might truncate the - * inode pages and wait for inode pages locks. - * To release such page lock, the owner needs the - * reiserfs lock - */ - reiserfs_write_unlock_once(inode->i_sb, depth); - error = inode_setattr(inode, attr); - depth = reiserfs_write_lock_once(inode->i_sb); + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + struct reiserfs_transaction_handle th; + int jbegin_count = + 2 * + (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + + REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + + 2; + + error = reiserfs_chown_xattrs(inode, attr); + + if (error) + return error; + + /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ + error = journal_begin(&th, inode->i_sb, jbegin_count); + if (error) + goto out; + error = dquot_transfer(inode, attr); + if (error) { + journal_end(&th, inode->i_sb, jbegin_count); + goto out; } + + /* Update corresponding info in inode so that everything is in + * one transaction */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + mark_inode_dirty(inode); + error = journal_end(&th, inode->i_sb, jbegin_count); + if (error) + goto out; } + /* + * Relax the lock here, as it might truncate the + * inode pages and wait for inode pages locks. + * To release such page lock, the owner needs the + * reiserfs lock + */ + reiserfs_write_unlock_once(inode->i_sb, depth); + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) + error = vmtruncate(inode, attr->ia_size); + + if (!error) { + setattr_copy(inode, attr); + mark_inode_dirty(inode); + } + depth = reiserfs_write_lock_once(inode->i_sb); + if (!error && reiserfs_posixacl(inode->i_sb)) { if (attr->ia_valid & ATTR_MODE) error = reiserfs_acl_chmod(inode); diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 94f6319292a..0a65939508e 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -38,7 +38,17 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (error) return error; - return inode_setattr(inode, attr); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } const struct inode_operations sysv_file_inode_operations = { diff --git a/fs/udf/file.c b/fs/udf/file.c index 7376032c89c..04bb5bf0763 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -236,7 +236,17 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (error) return error; - return inode_setattr(inode, attr); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } const struct inode_operations udf_file_inode_operations = { diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 589e01a465b..085e11623b7 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -525,7 +525,10 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; } - return inode_setattr(inode, attr); + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } const struct inode_operations ufs_file_inode_operations = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 8ebb5f01a41..6ecb83c00a6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,7 +2392,6 @@ extern int buffer_migrate_page(struct address_space *, extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); -extern int __must_check inode_setattr(struct inode *, const struct iattr *); extern void setattr_copy(struct inode *inode, const struct iattr *attr); extern void file_update_time(struct file *file); -- cgit v1.2.3-70-g09d2 From db78b877f7744bec4a9d9f9e7d10da3931d7cd39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:30:03 +0200 Subject: always call inode_change_ok early in ->setattr Make sure we call inode_change_ok before doing any changes in ->setattr, and make sure to call it even if our fs wants to ignore normal UNIX permissions, but use the ATTR_FORCE to skip those. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/cifs/inode.c | 29 +++++++++++++---------------- fs/fat/file.c | 30 +++++++++++++++--------------- fs/fuse/dir.c | 11 ++++++----- fs/logfs/file.c | 8 ++++---- fs/reiserfs/inode.c | 8 ++++---- mm/shmem.c | 10 ++++++---- 6 files changed, 48 insertions(+), 48 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9c6a40f5cc5..b95f4a5af01 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1796,14 +1796,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) xid = GetXid(); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { - /* check if we have permission to change attrs */ - rc = inode_change_ok(inode, attrs); - if (rc < 0) - goto out; - else - rc = 0; - } + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + attrs->ia_valid |= ATTR_FORCE; + + rc = inode_change_ok(inode, attrs); + if (rc < 0) + goto out; full_path = build_path_from_dentry(direntry); if (full_path == NULL) { @@ -1934,14 +1932,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) cFYI(1, "setattr on file %s attrs->iavalid 0x%x", direntry->d_name.name, attrs->ia_valid); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { - /* check if we have permission to change attrs */ - rc = inode_change_ok(inode, attrs); - if (rc < 0) { - FreeXid(xid); - return rc; - } else - rc = 0; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + attrs->ia_valid |= ATTR_FORCE; + + rc = inode_change_ok(inode, attrs); + if (rc < 0) { + FreeXid(xid); + return rc; } full_path = build_path_from_dentry(direntry); diff --git a/fs/fat/file.c b/fs/fat/file.c index 20813d2c7d6..b2eedcee751 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -387,21 +387,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) unsigned int ia_valid; int error; - /* - * Expand the file. Since inode_setattr() updates ->i_size - * before calling the ->truncate(), but FAT needs to fill the - * hole before it. XXX: this is no longer true with new truncate - * sequence. - */ - if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size > inode->i_size) { - error = fat_cont_expand(inode, attr->ia_size); - if (error || attr->ia_valid == ATTR_SIZE) - goto out; - attr->ia_valid &= ~ATTR_SIZE; - } - } - /* Check for setting the inode time. */ ia_valid = attr->ia_valid; if (ia_valid & TIMES_SET_FLAGS) { @@ -417,6 +402,21 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) goto out; } + /* + * Expand the file. Since inode_setattr() updates ->i_size + * before calling the ->truncate(), but FAT needs to fill the + * hole before it. XXX: this is no longer true with new truncate + * sequence. + */ + if (attr->ia_valid & ATTR_SIZE) { + if (attr->ia_size > inode->i_size) { + error = fat_cont_expand(inode, attr->ia_size); + if (error || attr->ia_valid == ATTR_SIZE) + goto out; + attr->ia_valid &= ~ATTR_SIZE; + } + } + if (((attr->ia_valid & ATTR_UID) && (attr->ia_uid != sbi->options.fs_uid)) || ((attr->ia_valid & ATTR_GID) && diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3cdc5f78a40..43a9b3730a9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1270,11 +1270,12 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, if (!fuse_allow_task(fc, current)) return -EACCES; - if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { - err = inode_change_ok(inode, attr); - if (err) - return err; - } + if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) + attr->ia_valid |= ATTR_FORCE; + + err = inode_change_ok(inode, attr); + if (err) + return err; if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) return 0; diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 23b4d03bbd2..4dd0f7c06e3 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -232,16 +232,16 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; int err = 0; + err = inode_change_ok(inode, attr); + if (err) + return err; + if (attr->ia_valid & ATTR_SIZE) { err = logfs_truncate(inode, attr->ia_size); if (err) return err; } - err = inode_change_ok(inode, attr); - if (err) - return err; - setattr_copy(inode, attr); mark_inode_dirty(inode); return 0; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 2b8dc5c2286..46ba1cfc2df 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3084,6 +3084,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) int depth; int error; + error = inode_change_ok(inode, attr); + if (error) + return error; + /* must be turned off for recursive notify_change calls */ ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); @@ -3133,10 +3137,6 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) goto out; } - error = inode_change_ok(inode, attr); - if (error) - goto out; - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { struct reiserfs_transaction_handle th; diff --git a/mm/shmem.c b/mm/shmem.c index 3b58ad65d26..0a43505eeae 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -767,6 +767,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) loff_t newsize = attr->ia_size; int error; + error = inode_change_ok(inode, attr); + if (error) + return error; + if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE) && newsize != inode->i_size) { struct page *page = NULL; @@ -809,11 +813,9 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) shmem_truncate_range(inode, newsize, (loff_t)-1); } - error = inode_change_ok(inode, attr); - if (!error) - setattr_copy(inode, attr); + setattr_copy(inode, attr); #ifdef CONFIG_TMPFS_POSIX_ACL - if (!error && (attr->ia_valid & ATTR_MODE)) + if (attr->ia_valid & ATTR_MODE) error = generic_acl_chmod(inode); #endif return error; -- cgit v1.2.3-70-g09d2 From 2c27c65ed0696f0b5df2dad2cf6462d72164d547 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:30:04 +0200 Subject: check ATTR_SIZE contraints in inode_change_ok Make sure we check the truncate constraints early on in ->setattr by adding those checks to inode_change_ok. Also clean up and document inode_change_ok to make this obvious. As a fallout we don't have to call inode_newsize_ok from simple_setsize and simplify it down to a truncate_setsize which doesn't return an error. This simplifies a lot of setattr implementations and means we use truncate_setsize almost everywhere. Get rid of fat_setsize now that it's trivial and mark ext2_setsize static to make the calling convention obvious. Keep the inode_newsize_ok in vmtruncate for now as all callers need an audit for its removal anyway. Note: setattr code in ecryptfs doesn't call inode_change_ok at all and needs a deeper audit, but that is left for later. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/adfs/inode.c | 5 +---- fs/attr.c | 44 ++++++++++++++++++++++++++++++-------------- fs/ecryptfs/inode.c | 18 ++++++++++++++---- fs/ext2/inode.c | 12 ++---------- fs/fat/fat.h | 1 - fs/fat/file.c | 17 ++--------------- fs/fuse/dir.c | 6 +----- fs/gfs2/aops.c | 4 ++-- fs/gfs2/ops_inode.c | 6 ++---- fs/jffs2/fs.c | 4 ++-- fs/libfs.c | 51 ++------------------------------------------------- fs/ocfs2/file.c | 6 +++--- fs/ramfs/file-nommu.c | 5 ++--- fs/smbfs/inode.c | 4 +--- fs/ubifs/file.c | 23 ++++++++--------------- fs/ubifs/ubifs.h | 2 +- fs/ufs/truncate.c | 11 +++-------- include/linux/fs.h | 1 - include/linux/mm.h | 1 + mm/shmem.c | 5 ++--- mm/truncate.c | 38 +++++++++++++++++++++++++++++--------- 21 files changed, 108 insertions(+), 156 deletions(-) diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index b3dec193036..65794b8fe79 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -333,10 +333,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) /* XXX: this is missing some actual on-disk truncation.. */ if (ia_valid & ATTR_SIZE) - error = simple_setsize(inode, attr->ia_size); - - if (error) - goto out; + truncate_setsize(inode, attr->ia_size); if (ia_valid & ATTR_MTIME) { inode->i_mtime = attr->ia_mtime; diff --git a/fs/attr.c b/fs/attr.c index ed44d8ae8bf..7ca41811afa 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -14,35 +14,53 @@ #include #include -/* Taken over from the old code... */ - -/* POSIX UID/GID verification for setting inode attributes. */ +/** + * inode_change_ok - check if attribute changes to an inode are allowed + * @inode: inode to check + * @attr: attributes to change + * + * Check if we are allowed to change the attributes contained in @attr + * in the given inode. This includes the normal unix access permission + * checks, as well as checks for rlimits and others. + * + * Should be called as the first thing in ->setattr implementations, + * possibly after taking additional locks. + */ int inode_change_ok(const struct inode *inode, struct iattr *attr) { - int retval = -EPERM; unsigned int ia_valid = attr->ia_valid; + /* + * First check size constraints. These can't be overriden using + * ATTR_FORCE. + */ + if (ia_valid & ATTR_SIZE) { + int error = inode_newsize_ok(inode, attr->ia_size); + if (error) + return error; + } + /* If force is set do it anyway. */ if (ia_valid & ATTR_FORCE) - goto fine; + return 0; /* Make sure a caller can chown. */ if ((ia_valid & ATTR_UID) && (current_fsuid() != inode->i_uid || attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) - goto error; + return -EPERM; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && (current_fsuid() != inode->i_uid || (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && !capable(CAP_CHOWN)) - goto error; + return -EPERM; /* Make sure a caller can chmod. */ if (ia_valid & ATTR_MODE) { if (!is_owner_or_cap(inode)) - goto error; + return -EPERM; /* Also check the setgid bit! */ if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : inode->i_gid) && !capable(CAP_FSETID)) @@ -52,12 +70,10 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) /* Check for setting the inode time. */ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { if (!is_owner_or_cap(inode)) - goto error; + return -EPERM; } -fine: - retval = 0; -error: - return retval; + + return 0; } EXPORT_SYMBOL(inode_change_ok); @@ -113,7 +129,7 @@ EXPORT_SYMBOL(inode_newsize_ok); * * setattr_copy updates the inode's metadata with that specified * in attr. Noticably missing is inode size update, which is more complex - * as it requires pagecache updates. See simple_setsize. + * as it requires pagecache updates. * * The inode is not marked as dirty after this operation. The rationale is * that for "simple" filesystems, the struct inode is the inode storage. diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 31ef5252f0f..82900b063b1 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -804,10 +804,20 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, size_t num_zeros = (PAGE_CACHE_SIZE - (ia->ia_size & ~PAGE_CACHE_MASK)); + + /* + * XXX(truncate) this should really happen at the begginning + * of ->setattr. But the code is too messy to that as part + * of a larger patch. ecryptfs is also totally missing out + * on the inode_change_ok check at the beginning of + * ->setattr while would include this. + */ + rc = inode_newsize_ok(inode, ia->ia_size); + if (rc) + goto out; + if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { - rc = simple_setsize(inode, ia->ia_size); - if (rc) - goto out; + truncate_setsize(inode, ia->ia_size); lower_ia->ia_size = ia->ia_size; lower_ia->ia_valid |= ATTR_SIZE; goto out; @@ -830,7 +840,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, goto out; } } - simple_setsize(inode, ia->ia_size); + truncate_setsize(inode, ia->ia_size); rc = ecryptfs_write_inode_size_to_metadata(inode); if (rc) { printk(KERN_ERR "Problem with " diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7dee7b3f368..069620b30d4 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1156,15 +1156,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset) __ext2_truncate_blocks(inode, offset); } -int ext2_setsize(struct inode *inode, loff_t newsize) +static int ext2_setsize(struct inode *inode, loff_t newsize) { - loff_t oldsize; int error; - error = inode_newsize_ok(inode, newsize); - if (error) - return error; - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return -EINVAL; @@ -1184,10 +1179,7 @@ int ext2_setsize(struct inode *inode, loff_t newsize) if (error) return error; - oldsize = inode->i_size; - i_size_write(inode, newsize); - truncate_pagecache(inode, oldsize, newsize); - + truncate_setsize(inode, newsize); __ext2_truncate_blocks(inode, newsize); inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 27ac2572595..d75a77f85c2 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -306,7 +306,6 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, extern const struct file_operations fat_file_operations; extern const struct inode_operations fat_file_inode_operations; extern int fat_setattr(struct dentry * dentry, struct iattr * attr); -extern int fat_setsize(struct inode *inode, loff_t offset); extern void fat_truncate_blocks(struct inode *inode, loff_t offset); extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/fat/file.c b/fs/fat/file.c index b2eedcee751..7257752b6d5 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -364,18 +364,6 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) return 0; } -int fat_setsize(struct inode *inode, loff_t offset) -{ - int error; - - error = simple_setsize(inode, offset); - if (error) - return error; - fat_truncate_blocks(inode, offset); - - return error; -} - #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) /* valid file mode bits */ #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) @@ -441,9 +429,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - error = fat_setsize(inode, attr->ia_size); - if (error) - goto out; + truncate_setsize(inode, attr->ia_size); + fat_truncate_blocks(inode, attr->ia_size); } setattr_copy(inode, attr); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 43a9b3730a9..3978a42d4f0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1280,12 +1280,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) return 0; - if (attr->ia_valid & ATTR_SIZE) { - err = inode_newsize_ok(inode, attr->ia_size); - if (err) - return err; + if (attr->ia_valid & ATTR_SIZE) is_truncate = true; - } req = fuse_get_req(fc); if (IS_ERR(req)) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 703000d6e4d..54fe087bf54 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -702,12 +702,12 @@ out: page_cache_release(page); /* - * XXX(hch): the call below should probably be replaced with + * XXX(truncate): the call below should probably be replaced with * a call to the gfs2-specific truncate blocks helper to actually * release disk blocks.. */ if (pos + len > ip->i_inode.i_size) - simple_setsize(&ip->i_inode, ip->i_inode.i_size); + truncate_setsize(&ip->i_inode, ip->i_inode.i_size); out_endtrans: gfs2_trans_end(sdp); out_trans_fail: diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index d7d410a4ca4..1009be2c973 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1072,7 +1072,7 @@ int gfs2_permission(struct inode *inode, int mask) } /* - * XXX: should be changed to have proper ordering by opencoding simple_setsize + * XXX(truncate): the truncate_setsize calls should be moved to the end. */ static int setattr_size(struct inode *inode, struct iattr *attr) { @@ -1084,10 +1084,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr) error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (error) return error; - error = simple_setsize(inode, attr->ia_size); + truncate_setsize(inode, attr->ia_size); gfs2_trans_end(sdp); - if (error) - return error; } error = gfs2_truncatei(ip, attr->ia_size); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 459d39d1ea0..1b2426604fe 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -169,13 +169,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) mutex_unlock(&f->sem); jffs2_complete_reservation(c); - /* We have to do the simple_setsize() without f->sem held, since + /* We have to do the truncate_setsize() without f->sem held, since some pages may be locked and waiting for it in readpage(). We are protected from a simultaneous write() extending i_size back past iattr->ia_size, because do_truncate() holds the generic inode semaphore. */ if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { - simple_setsize(inode, iattr->ia_size); + truncate_setsize(inode, iattr->ia_size); inode->i_blocks = (inode->i_size + 511) >> 9; } diff --git a/fs/libfs.c b/fs/libfs.c index 40562224b71..0a9da95317f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -326,49 +326,6 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; } -/** - * simple_setsize - handle core mm and vfs requirements for file size change - * @inode: inode - * @newsize: new file size - * - * Returns 0 on success, -error on failure. - * - * simple_setsize must be called with inode_mutex held. - * - * simple_setsize will check that the requested new size is OK (see - * inode_newsize_ok), and then will perform the necessary i_size update - * and pagecache truncation (if necessary). It will be typically be called - * from the filesystem's setattr function when ATTR_SIZE is passed in. - * - * The inode itself must have correct permissions and attributes to allow - * i_size to be changed, this function then just checks that the new size - * requested is valid. - * - * In the case of simple in-memory filesystems with inodes stored solely - * in the inode cache, and file data in the pagecache, nothing more needs - * to be done to satisfy a truncate request. Filesystems with on-disk - * blocks for example will need to free them in the case of truncate, in - * that case it may be easier not to use simple_setsize (but each of its - * components will likely be required at some point to update pagecache - * and inode etc). - */ -int simple_setsize(struct inode *inode, loff_t newsize) -{ - loff_t oldsize; - int error; - - error = inode_newsize_ok(inode, newsize); - if (error) - return error; - - oldsize = inode->i_size; - i_size_write(inode, newsize); - truncate_pagecache(inode, oldsize, newsize); - - return error; -} -EXPORT_SYMBOL(simple_setsize); - /** * simple_setattr - setattr for simple filesystem * @dentry: dentry @@ -394,12 +351,8 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; - if (iattr->ia_valid & ATTR_SIZE) { - error = simple_setsize(inode, iattr->ia_size); - if (error) - return error; - } - + if (iattr->ia_valid & ATTR_SIZE) + truncate_setsize(inode, iattr->ia_size); setattr_copy(inode, iattr); mark_inode_dirty(inode); return 0; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 584cf8ac167..81296b4e364 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1233,7 +1233,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } /* - * This will intentionally not wind up calling simple_setsize(), + * This will intentionally not wind up calling truncate_setsize(), * since all the work for a size change has been done above. * Otherwise, we could get into problems with truncate as * ip_alloc_sem is used there to protect against i_size @@ -2308,12 +2308,12 @@ relock: * blocks outside i_size. Trim these off again. * Don't need i_size_read because we hold i_mutex. * - * XXX(hch): this looks buggy because ocfs2 did not + * XXX(truncate): this looks buggy because ocfs2 did not * actually implement ->truncate. Take a look at * the new truncate sequence and update this accordingly */ if (*ppos + count > inode->i_size) - simple_setsize(inode, inode->i_size); + truncate_setsize(inode, inode->i_size); ret = written; goto out_dio; } diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 8d44f0347b2..9eead2c796b 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -146,9 +146,8 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) return ret; } - ret = simple_setsize(inode, newsize); - - return ret; + truncate_setsize(inode, newsize); + return 0; } /*****************************************************************************/ diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 9551cb6f7fe..e338f0a5a70 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -714,9 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr) error = server->ops->truncate(inode, attr->ia_size); if (error) goto out; - error = simple_setsize(inode, attr->ia_size); - if (error) - goto out; + truncate_setsize(inode, attr->ia_size); refresh = 1; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 12f445cee9f..03ae894c45d 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -967,14 +967,15 @@ static int do_writepage(struct page *page, int len) * the page locked, and it locks @ui_mutex. However, write-back does take inode * @i_mutex, which means other VFS operations may be run on this inode at the * same time. And the problematic one is truncation to smaller size, from where - * we have to call 'simple_setsize()', which first changes @inode->i_size, then + * we have to call 'truncate_setsize()', which first changes @inode->i_size, then * drops the truncated pages. And while dropping the pages, it takes the page - * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with + * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This * means that @inode->i_size is changed while @ui_mutex is unlocked. * - * XXX: with the new truncate the above is not true anymore, the simple_setsize - * calls can be replaced with the individual components. + * XXX(truncate): with the new truncate sequence this is not true anymore, + * and the calls to truncate_setsize can be move around freely. They should + * be moved to the very end of the truncate sequence. * * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond * inode size. How do we do this if @inode->i_size may became smaller while we @@ -1128,9 +1129,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, budgeted = 0; } - err = simple_setsize(inode, new_size); - if (err) - goto out_budg; + truncate_setsize(inode, new_size); if (offset) { pgoff_t index = new_size >> PAGE_CACHE_SHIFT; @@ -1217,16 +1216,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (attr->ia_valid & ATTR_SIZE) { dbg_gen("size %lld -> %lld", inode->i_size, new_size); - err = simple_setsize(inode, new_size); - if (err) - goto out; + truncate_setsize(inode, new_size); } mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); - /* 'simple_setsize()' changed @i_size, update @ui_size */ + /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1248,10 +1245,6 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (IS_SYNC(inode)) err = inode->i_sb->s_op->write_inode(inode, NULL); return err; - -out: - ubifs_release_budget(c, &req); - return err; } int ubifs_setattr(struct dentry *dentry, struct iattr *attr) diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 04310878f44..0c9876b396d 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb { * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot * make sure @inode->i_size is always changed under @ui_mutex, because it - * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock * with 'ubifs_writepage()' (see file.c). All the other inode fields are * changed under @ui_mutex, so they do not need "shadow" fields. Note, one * could consider to rework locking and base it on "shadow" fields. diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 085e11623b7..34d5cb13532 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -500,11 +500,6 @@ out: return err; } -/* - * TODO: - * - truncate case should use proper ordering instead of using - * simple_setsize - */ int ufs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -518,9 +513,9 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { loff_t old_i_size = inode->i_size; - error = simple_setsize(inode, attr->ia_size); - if (error) - return error; + /* XXX(truncate): truncate_setsize should be called last */ + truncate_setsize(inode, attr->ia_size); + error = ufs_truncate(inode, old_i_size); if (error) return error; diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ecb83c00a6..5547b1b027d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2355,7 +2355,6 @@ extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -extern int simple_setsize(struct inode *, loff_t); extern int noop_fsync(struct file *, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); diff --git a/include/linux/mm.h b/include/linux/mm.h index a2b48041b91..980164ea10e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -815,6 +815,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, } extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern void truncate_setsize(struct inode *inode, loff_t newsize); extern int vmtruncate(struct inode *inode, loff_t offset); extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); diff --git a/mm/shmem.c b/mm/shmem.c index 0a43505eeae..33222ba256f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -805,11 +805,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) } } - error = simple_setsize(inode, newsize); + /* XXX(truncate): truncate_setsize should be called last */ + truncate_setsize(inode, newsize); if (page) page_cache_release(page); - if (error) - return error; shmem_truncate_range(inode, newsize, (loff_t)-1); } diff --git a/mm/truncate.c b/mm/truncate.c index 937571b8b23..ba887bff48c 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -540,29 +540,49 @@ void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) } EXPORT_SYMBOL(truncate_pagecache); +/** + * truncate_setsize - update inode and pagecache for a new file size + * @inode: inode + * @newsize: new file size + * + * truncate_setsize updastes i_size update and performs pagecache + * truncation (if necessary) for a file size updates. It will be + * typically be called from the filesystem's setattr function when + * ATTR_SIZE is passed in. + * + * Must be called with inode_mutex held and after all filesystem + * specific block truncation has been performed. + */ +void truncate_setsize(struct inode *inode, loff_t newsize) +{ + loff_t oldsize; + + oldsize = inode->i_size; + i_size_write(inode, newsize); + + truncate_pagecache(inode, oldsize, newsize); +} +EXPORT_SYMBOL(truncate_setsize); + /** * vmtruncate - unmap mappings "freed" by truncate() syscall * @inode: inode of the file used * @offset: file offset to start truncating * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - * - * This function is deprecated and simple_setsize or truncate_pagecache - * should be used instead. + * This function is deprecated and truncate_setsize or truncate_pagecache + * should be used instead, together with filesystem specific block truncation. */ int vmtruncate(struct inode *inode, loff_t offset) { int error; - error = simple_setsize(inode, offset); + error = inode_newsize_ok(inode, offset); if (error) return error; + truncate_setsize(inode, offset); if (inode->i_op->truncate) inode->i_op->truncate(inode); - - return error; + return 0; } EXPORT_SYMBOL(vmtruncate); -- cgit v1.2.3-70-g09d2 From 1e2317350971c8b01e6adddc798a00e9bcc1a440 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Jun 2010 09:29:20 +0200 Subject: update documentation for the new truncate sequence Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- Documentation/filesystems/porting | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index a7e9746ee7e..f9547a5c187 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -273,3 +273,21 @@ it's safe to remove it. If you don't need it, remove it. deliberate; as soon as struct block_device * is propagated in a reasonable way by that code fixing will become trivial; until then nothing can be done. + +[mandatory] + + block truncatation on error exit from ->write_begin, and ->direct_IO +moved from generic methods (block_write_begin, cont_write_begin, +nobh_write_begin, blockdev_direct_IO*) to callers. Take a look at +ext2_write_failed and callers for an example. + +[mandatory] + + ->truncate is going away. The whole truncate sequence needs to be +implemented in ->setattr, which is now mandatory for filesystems +implementing on-disk size changes. Start with a copy of the old inode_setattr +and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to +be in order of zeroing blocks using block_truncate_page or similar helpers, +size update and on finally on-disk truncation which should not fail. +inode_change_ok now includes the size checks for ATTR_SIZE and must be called +in the beginning of ->setattr unconditionally. -- cgit v1.2.3-70-g09d2 From 41cce647f8dbe26941bed2158fad0839aab7a294 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Jun 2010 13:24:56 -0400 Subject: jffs2: don't open-code iget_failed() Signed-off-by: Al Viro --- fs/jffs2/dir.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 166062a6823..5fd3b5cecda 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -232,9 +232,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, return 0; fail: - make_bad_inode(inode); - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); jffs2_free_raw_inode(ri); return ret; } @@ -454,9 +452,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char return 0; fail: - make_bad_inode(inode); - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ret; } @@ -601,9 +597,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) return 0; fail: - make_bad_inode(inode); - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ret; } @@ -778,9 +772,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de return 0; fail: - make_bad_inode(inode); - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ret; } -- cgit v1.2.3-70-g09d2 From 2f246fd0f126f3b3c23a4e6b7109350e83356bd6 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 9 Jun 2010 18:23:18 +0300 Subject: exofs: New truncate sequence These changes are crafted based on the similar conversion done to ext2 by Nick Piggin. * Remove the deprecated ->truncate vector. Let exofs_setattr take care of on-disk size updates. * Call truncate_pagecache on the unused pages if write_begin/end fails. * Cleanup exofs_delete_inode that did stupid inode writes and updates on an inode that will be removed. * And finally get rid of exofs_get_block. We never had any blocks it was all for calling nobh_truncate_page. nobh_truncate_page is not actually needed in exofs since the last page is complete and gone, just like all the other pages. There is no partial blocks in exofs. I've tested with this patch, and there are no apparent failures, so far. CC: Nick Piggin CC: Christoph Hellwig Signed-off-by: Boaz Harrosh Signed-off-by: Al Viro --- fs/exofs/exofs.h | 1 - fs/exofs/file.c | 1 - fs/exofs/inode.c | 115 ++++++++++++++++++++----------------------------------- 3 files changed, 42 insertions(+), 75 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 22721b2fd89..0706ce996c8 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -256,7 +256,6 @@ static inline int exofs_oi_read(struct exofs_i_info *oi, } /* inode.c */ -void exofs_truncate(struct inode *inode); int exofs_setattr(struct dentry *, struct iattr *); int exofs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, diff --git a/fs/exofs/file.c b/fs/exofs/file.c index fef6899be39..f9bfe2b501d 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -86,6 +86,5 @@ const struct file_operations exofs_file_operations = { }; const struct inode_operations exofs_file_inode_operations = { - .truncate = exofs_truncate, .setattr = exofs_setattr, }; diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 4bfc1f4fd01..ccd0ce3eea7 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -697,6 +697,13 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc) return write_exec(&pcol); } +/* i_mutex held using inode->i_size directly */ +static void _write_failed(struct inode *inode, loff_t to) +{ + if (to > inode->i_size) + truncate_pagecache(inode, to, inode->i_size); +} + int exofs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -710,7 +717,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, fsdata); if (ret) { EXOFS_DBGMSG("simple_write_begin faild\n"); - return ret; + goto out; } page = *pagep; @@ -725,6 +732,9 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, EXOFS_DBGMSG("__readpage_filler faild\n"); } } +out: + if (unlikely(ret)) + _write_failed(mapping->host, pos + len); return ret; } @@ -750,6 +760,10 @@ static int exofs_write_end(struct file *file, struct address_space *mapping, int ret; ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); + if (unlikely(ret)) + _write_failed(inode, pos + len); + + /* TODO: once simple_write_end marks inode dirty remove */ if (i_size != inode->i_size) mark_inode_dirty(inode); return ret; @@ -808,91 +822,49 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode) return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); } -/* - * get_block_t - Fill in a buffer_head - * An OSD takes care of block allocation so we just fake an allocation by - * putting in the inode's sector_t in the buffer_head. - * TODO: What about the case of create==0 and @iblock does not exist in the - * object? - */ -static int exofs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - map_bh(bh_result, inode->i_sb, iblock); - return 0; -} - const struct osd_attr g_attr_logical_length = ATTR_DEF( OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); -static int _do_truncate(struct inode *inode) +static int _do_truncate(struct inode *inode, loff_t newsize) { struct exofs_i_info *oi = exofs_i(inode); - loff_t isize = i_size_read(inode); int ret; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); + ret = exofs_oi_truncate(oi, (u64)newsize); + if (likely(!ret)) + truncate_setsize(inode, newsize); - ret = exofs_oi_truncate(oi, (u64)isize); - EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); + EXOFS_DBGMSG("(0x%lx) size=0x%llx ret=>%d\n", + inode->i_ino, newsize, ret); return ret; } /* - * Truncate a file to the specified size - all we have to do is set the size - * attribute. We make sure the object exists first. - */ -void exofs_truncate(struct inode *inode) -{ - struct exofs_i_info *oi = exofs_i(inode); - int ret; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) - || S_ISLNK(inode->i_mode))) - return; - if (exofs_inode_is_fast_symlink(inode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - - /* if we are about to truncate an object, and it hasn't been - * created yet, wait - */ - if (unlikely(wait_obj_created(oi))) - goto fail; - - ret = _do_truncate(inode); - if (ret) - goto fail; - -out: - mark_inode_dirty(inode); - return; -fail: - make_bad_inode(inode); - goto out; -} - -/* - * Set inode attributes - just call generic functions. + * Set inode attributes - update size attribute on OSD if needed, + * otherwise just call generic functions. */ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; int error; + /* if we are about to modify an object, and it hasn't been + * created yet, wait + */ + error = wait_obj_created(exofs_i(inode)); + if (unlikely(error)) + return error; + error = inode_change_ok(inode, iattr); - if (error) + if (unlikely(error)) return error; if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { - int error; - - error = vmtruncate(inode, iattr->ia_size); - if (error) + error = _do_truncate(inode, iattr->ia_size); + if (unlikely(error)) return error; } @@ -1345,28 +1317,25 @@ void exofs_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); + /* TODO: should do better here */ if (is_bad_inode(inode)) goto no_delete; - mark_inode_dirty(inode); - exofs_update_inode(inode, inode_needs_sync(inode)); - inode->i_size = 0; - if (inode->i_blocks) - exofs_truncate(inode); - clear_inode(inode); - ret = exofs_get_io_state(&sbi->layout, &ios); - if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); - return; - } - /* if we are deleting an obj that hasn't been created yet, wait */ if (!obj_created(oi)) { BUG_ON(!obj_2bcreated(oi)); wait_event(oi->i_wq, obj_created(oi)); + /* ignore the error attempt a remove anyway */ + } + + /* Now Remove the OSD objects */ + ret = exofs_get_io_state(&sbi->layout, &ios); + if (unlikely(ret)) { + EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); + return; } ios->obj.id = exofs_oi_objno(oi); -- cgit v1.2.3-70-g09d2 From fa9b227e9019ebaeeb06224ba531a490f91144b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2010 05:17:31 -0400 Subject: xfs: new truncate sequence Convert XFS to the new truncate sequence. We still can have errors after updating the file size in xfs_setattr, but these are real I/O errors and lead to a transaction abort and filesystem shutdown, so they are not an issue. Errors from ->write_begin and write_end can now be handled correctly because we can actually get rid of the delalloc extents while previous the buffer state was stipped in block_invalidatepage. There is still no error handling for ->direct_IO, because doing so will need some major restructuring given that we only have the iolock shared and do not hold i_mutex at all. Fortunately leaving the normally allocated blocks behind there is not a major issue and this will get cleaned up by xfs_free_eofblock later. Note: the patch is against Al's vfs.git tree as that contains the nessecary preparations. I'd prefer to get it applied there so that we can get some testing in linux-next. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/xfs/linux-2.6/xfs_aops.c | 42 ++++++++++++++++++++++++++++++++++++------ fs/xfs/linux-2.6/xfs_iops.c | 16 ---------------- fs/xfs/linux-2.6/xfs_linux.h | 2 -- fs/xfs/xfs_vnodeops.c | 38 ++++++++++++++++++++------------------ 4 files changed, 56 insertions(+), 42 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index bf7aad0d78b..15412fe15c3 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1494,6 +1494,22 @@ xfs_vm_direct_IO( return ret; } +STATIC void +xfs_vm_write_failed( + struct address_space *mapping, + loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + struct iattr ia = { + .ia_valid = ATTR_SIZE | ATTR_FORCE, + .ia_size = inode->i_size, + }; + xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); + } +} + STATIC int xfs_vm_write_begin( struct file *file, @@ -1508,12 +1524,26 @@ xfs_vm_write_begin( ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, pagep, xfs_get_blocks); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + xfs_vm_write_failed(mapping, pos + len); + return ret; +} + +STATIC int +xfs_vm_write_end( + struct file *file, + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned copied, + struct page *page, + void *fsdata) +{ + int ret; + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (unlikely(ret < len)) + xfs_vm_write_failed(mapping, pos + len); return ret; } @@ -1559,7 +1589,7 @@ const struct address_space_operations xfs_address_space_operations = { .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, - .write_end = generic_write_end, + .write_end = xfs_vm_write_end, .bmap = xfs_vm_bmap, .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 536b81e63a3..62dd349face 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -540,21 +540,6 @@ xfs_vn_setattr( return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } -/* - * block_truncate_page can return an error, but we can't propagate it - * at all here. Leave a complaint + stack trace in the syslog because - * this could be bad. If it is bad, we need to propagate the error further. - */ -STATIC void -xfs_vn_truncate( - struct inode *inode) -{ - int error; - error = block_truncate_page(inode->i_mapping, inode->i_size, - xfs_get_blocks); - WARN_ON(error); -} - STATIC long xfs_vn_fallocate( struct inode *inode, @@ -694,7 +679,6 @@ xfs_vn_fiemap( static const struct inode_operations xfs_inode_operations = { .check_acl = xfs_check_acl, - .truncate = xfs_vn_truncate, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 998a9d7fb9c..2fa0bd9ebc7 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -156,8 +156,6 @@ */ #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() -#define xfs_itruncate_data(ip, off) \ - (-vmtruncate(VFS_I(ip), (off))) /* Move the kernel do_div definition off to one side */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 3ac137dd531..66d585c6917 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -221,8 +221,11 @@ xfs_setattr( * transaction to modify the i_size. */ code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); + if (code) + goto error_return; } xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; /* * We are going to log the inode size change in this @@ -236,36 +239,35 @@ xfs_setattr( * really care about here and prevents waiting for other data * not within the range we care about here. */ - if (!code && - ip->i_size != ip->i_d.di_size && + if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { code = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, XBF_ASYNC, FI_NONE); + if (code) + goto error_return; } /* wait for all I/O to complete */ xfs_ioend_wait(ip); - if (!code) - code = xfs_itruncate_data(ip, iattr->ia_size); - if (code) { - ASSERT(tp == NULL); - lock_flags &= ~XFS_ILOCK_EXCL; - ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock); + code = -block_truncate_page(inode->i_mapping, iattr->ia_size, + xfs_get_blocks); + if (code) goto error_return; - } + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - if ((code = xfs_trans_reserve(tp, 0, - XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return code; - } + code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (code) + goto error_return; + + truncate_setsize(inode, iattr->ia_size); + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + lock_flags |= XFS_ILOCK_EXCL; + xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); -- cgit v1.2.3-70-g09d2 From b5fc510c48f631882ccec3c0f02a25d5b67de09f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Jul 2010 12:24:09 +0400 Subject: get rid of file_fsync() Copy and simplify in the only two users remaining. Signed-off-by: Al Viro --- fs/hfs/inode.c | 26 +++++++++++++++++++++++++- fs/hfsplus/hfsplus_fs.h | 1 + fs/hfsplus/inode.c | 27 ++++++++++++++++++++++++++- fs/hfsplus/super.c | 2 +- fs/sync.c | 25 ------------------------- include/linux/buffer_head.h | 1 - 6 files changed, 53 insertions(+), 29 deletions(-) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 87de671baa8..93ceec8fbb8 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -625,6 +625,30 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) return 0; } +static int hfs_file_fsync(struct file *filp, int datasync) +{ + struct inode *inode = filp->f_mapping->host; + struct super_block * sb; + int ret, err; + + /* sync the inode to buffers */ + ret = write_inode_now(inode, 0); + + /* sync the superblock to buffers */ + sb = inode->i_sb; + if (sb->s_dirt) { + lock_super(sb); + sb->s_dirt = 0; + if (!(sb->s_flags & MS_RDONLY)) + hfs_mdb_commit(sb); + unlock_super(sb); + } + /* .. finally sync the buffers to disk */ + err = sync_blockdev(sb->s_bdev); + if (!ret) + ret = err; + return ret; +} static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, @@ -634,7 +658,7 @@ static const struct file_operations hfs_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, - .fsync = file_fsync, + .fsync = hfs_file_fsync, .open = hfs_file_open, .release = hfs_file_release, }; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 6505c30ad96..dc856be3c2b 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -351,6 +351,7 @@ int hfsplus_show_options(struct seq_file *, struct vfsmount *); /* super.c */ struct inode *hfsplus_iget(struct super_block *, unsigned long); +int hfsplus_sync_fs(struct super_block *sb, int wait); /* tables.c */ extern u16 hfsplus_case_fold_table[]; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 654c5a8ddf1..c5a979d62c6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -311,6 +311,31 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +static int hfsplus_file_fsync(struct file *filp, int datasync) +{ + struct inode *inode = filp->f_mapping->host; + struct super_block * sb; + int ret, err; + + /* sync the inode to buffers */ + ret = write_inode_now(inode, 0); + + /* sync the superblock to buffers */ + sb = inode->i_sb; + if (sb->s_dirt) { + if (!(sb->s_flags & MS_RDONLY)) + hfsplus_sync_fs(sb, 1); + else + sb->s_dirt = 0; + } + + /* .. finally sync the buffers to disk */ + err = sync_blockdev(sb->s_bdev); + if (!ret) + ret = err; + return ret; +} + static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, @@ -328,7 +353,7 @@ static const struct file_operations hfsplus_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, - .fsync = file_fsync, + .fsync = hfsplus_file_fsync, .open = hfsplus_file_open, .release = hfsplus_file_release, .unlocked_ioctl = hfsplus_ioctl, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 74b473a8ef9..a32c241e4e4 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -154,7 +154,7 @@ static void hfsplus_clear_inode(struct inode *inode) } } -static int hfsplus_sync_fs(struct super_block *sb, int wait) +int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; diff --git a/fs/sync.c b/fs/sync.c index 15aa6f03b2d..ba76b9623e7 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -128,31 +128,6 @@ void emergency_sync(void) } } -/* - * Generic function to fsync a file. - */ -int file_fsync(struct file *filp, int datasync) -{ - struct inode *inode = filp->f_mapping->host; - struct super_block * sb; - int ret, err; - - /* sync the inode to buffers */ - ret = write_inode_now(inode, 0); - - /* sync the superblock to buffers */ - sb = inode->i_sb; - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - - /* .. finally sync the buffers to disk */ - err = sync_blockdev(sb->s_bdev); - if (!ret) - ret = err; - return ret; -} -EXPORT_SYMBOL(file_fsync); - /** * vfs_fsync_range - helper to sync a range of data & metadata to disk * @file: file to sync diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 3f69054f86d..620f1d1088c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -225,7 +225,6 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int file_fsync(struct file *, int); int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); int nobh_write_end(struct file *, struct address_space *, -- cgit v1.2.3-70-g09d2 From a4ffdde6e56fdf8c34ddadc2674d6eb978083369 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Jun 2010 17:38:30 -0400 Subject: simplify checks for I_CLEAR/I_FREEING add I_CLEAR instead of replacing I_FREEING with it. I_CLEAR is equivalent to I_FREEING for almost all code looking at either; it's there to keep track of having called clear_inode() exactly once per inode lifetime, at some point after having set I_FREEING. I_CLEAR and I_FREEING never get set at the same time with the current code, so we can switch to setting i_flags to I_FREEING | I_CLEAR instead of I_CLEAR without loss of information. As the result of such change, checks become simpler and the amount of code that needs to know about I_CLEAR shrinks a lot. Signed-off-by: Al Viro --- fs/btrfs/inode.c | 2 +- fs/drop_caches.c | 2 +- fs/fs-writeback.c | 8 ++++---- fs/gfs2/inode.c | 2 +- fs/inode.c | 16 ++++++++-------- fs/nilfs2/gcdat.c | 2 +- fs/notify/inode_mark.c | 6 +++--- fs/notify/inotify/inotify.c | 7 +++---- fs/quota/dquot.c | 2 +- fs/xfs/linux-2.6/xfs_iops.c | 4 ++-- include/linux/fs.h | 4 ++-- 11 files changed, 27 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7f9e0536db1..95eac011696 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3860,7 +3860,7 @@ again: p = &parent->rb_right; else { WARN_ON(!(entry->vfs_inode.i_state & - (I_WILL_FREE | I_FREEING | I_CLEAR))); + (I_WILL_FREE | I_FREEING))); rb_erase(parent, &root->inode_tree); RB_CLEAR_NODE(parent); spin_unlock(&root->inode_lock); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 83c4f600786..2195c213ab2 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) continue; if (inode->i_mapping->nrpages == 0) continue; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d5be1693ac9..7608880b5c5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -352,7 +352,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode_lock); inode->i_state &= ~I_SYNC; - if (!(inode->i_state & (I_FREEING | I_CLEAR))) { + if (!(inode->i_state & I_FREEING)) { if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { /* * More pages get dirtied by a fast dirtier. @@ -499,7 +499,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, if (inode_dirtied_after(inode, wbc->wb_start)) return 1; - BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); + BUG_ON(inode->i_state & I_FREEING); __iget(inode); pages_skipped = wbc->pages_skipped; writeback_single_inode(inode, wbc); @@ -935,7 +935,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (hlist_unhashed(&inode->i_hash)) goto out; } - if (inode->i_state & (I_FREEING|I_CLEAR)) + if (inode->i_state & I_FREEING) goto out; /* @@ -1001,7 +1001,7 @@ static void wait_sb_inodes(struct super_block *sb) list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { struct address_space *mapping; - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) continue; mapping = inode->i_mapping; if (mapping->nrpages == 0) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6c023a3b5d2..08140f185a3 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -84,7 +84,7 @@ static int iget_skip_test(struct inode *inode, void *opaque) struct gfs2_skip_data *data = opaque; if (ip->i_no_addr == data->no_addr) { - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ + if (inode->i_state & (I_FREEING|I_WILL_FREE)){ data->skipped = 1; return 0; } diff --git a/fs/inode.c b/fs/inode.c index 722860b323a..71fe079ca32 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -317,7 +317,7 @@ void clear_inode(struct inode *inode) bd_forget(inode); if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); - inode->i_state = I_CLEAR; + inode->i_state = I_FREEING | I_CLEAR; } EXPORT_SYMBOL(clear_inode); @@ -553,7 +553,7 @@ repeat: continue; if (!test(inode, data)) continue; - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { + if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } @@ -578,7 +578,7 @@ repeat: continue; if (inode->i_sb != sb) continue; - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { + if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } @@ -840,7 +840,7 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode_lock); - if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) + if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) __iget(inode); else /* @@ -1089,7 +1089,7 @@ int insert_inode_locked(struct inode *inode) continue; if (old->i_sb != sb) continue; - if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + if (old->i_state & (I_FREEING|I_WILL_FREE)) continue; break; } @@ -1128,7 +1128,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, continue; if (!test(old, data)) continue; - if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + if (old->i_state & (I_FREEING|I_WILL_FREE)) continue; break; } @@ -1218,7 +1218,7 @@ void generic_delete_inode(struct inode *inode) hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); wake_up_inode(inode); - BUG_ON(inode->i_state != I_CLEAR); + BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); destroy_inode(inode); } EXPORT_SYMBOL(generic_delete_inode); @@ -1322,7 +1322,7 @@ static inline void iput_final(struct inode *inode) void iput(struct inode *inode) { if (inode) { - BUG_ON(inode->i_state == I_CLEAR); + BUG_ON(inode->i_state & I_CLEAR); if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) iput_final(inode); diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c index dd5f7e0a95f..84a45d1d546 100644 --- a/fs/nilfs2/gcdat.c +++ b/fs/nilfs2/gcdat.c @@ -78,7 +78,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) struct inode *gcdat = nilfs->ns_gc_dat; struct nilfs_inode_info *gii = NILFS_I(gcdat); - gcdat->i_state = I_CLEAR; + gcdat->i_state = I_FREEING | I_CLEAR; gii->i_flags = 0; nilfs_palloc_clear_cache(gcdat); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 0399bcbe09c..152b83ec005 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -369,11 +369,11 @@ void fsnotify_unmount_inodes(struct list_head *list) struct inode *need_iput_tmp; /* - * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point * the inode cannot have any associated watches. */ - if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) continue; /* @@ -397,7 +397,7 @@ void fsnotify_unmount_inodes(struct list_head *list) /* In case the dropping of a reference would nuke next_i. */ if ((&next_i->i_sb_list != list) && atomic_read(&next_i->i_count) && - !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { + !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { __iget(next_i); need_iput = next_i; } diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 27b75ebc746..cf6b0429a25 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c @@ -377,11 +377,11 @@ void inotify_unmount_inodes(struct list_head *list) struct list_head *watches; /* - * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point * the inode cannot have any associated watches. */ - if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) continue; /* @@ -403,8 +403,7 @@ void inotify_unmount_inodes(struct list_head *list) /* In case the dropping of a reference would nuke next_i. */ if ((&next_i->i_sb_list != list) && atomic_read(&next_i->i_count) && - !(next_i->i_state & (I_CLEAR | I_FREEING | - I_WILL_FREE))) { + !(next_i->i_state & (I_FREEING|I_WILL_FREE))) { __iget(next_i); need_iput = next_i; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 437d2ca2de9..5cec3e2348f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -885,7 +885,7 @@ static void add_dquot_ref(struct super_block *sb, int type) spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) continue; #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 62dd349face..68be25dcd30 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -80,7 +80,7 @@ xfs_mark_inode_dirty_sync( { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) mark_inode_dirty_sync(inode); } @@ -90,7 +90,7 @@ xfs_mark_inode_dirty( { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) mark_inode_dirty(inode); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 5547b1b027d..218693d8d44 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1616,8 +1616,8 @@ struct super_operations { * I_FREEING Set when inode is about to be freed but still has dirty * pages or buffers attached or the inode itself is still * dirty. - * I_CLEAR Set by clear_inode(). In this state the inode is clean - * and can be destroyed. + * I_CLEAR Added by clear_inode(). In this state the inode is clean + * and can be destroyed. Inode keeps I_FREEING. * * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are * prohibited for many purposes. iget() must wait for -- cgit v1.2.3-70-g09d2 From b4272d4c810532e1a4dea111433a0af56d3bd2b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 19:33:20 -0400 Subject: unify fs/inode.c callers of clear_inode() For now, just a straightforward merge Signed-off-by: Al Viro --- fs/inode.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 71fe079ca32..60cb2596976 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -321,6 +321,19 @@ void clear_inode(struct inode *inode) } EXPORT_SYMBOL(clear_inode); +static void evict(struct inode *inode, int delete) +{ + const struct super_operations *op = inode->i_sb->s_op; + + if (delete && op->delete_inode) { + op->delete_inode(inode); + } else { + if (inode->i_data.nrpages) + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + } +} + /* * dispose_list - dispose of the contents of a local list * @head: the head of the list to free @@ -338,9 +351,7 @@ static void dispose_list(struct list_head *head) inode = list_first_entry(head, struct inode, i_list); list_del(&inode->i_list); - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + evict(inode, 0); spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); @@ -1194,8 +1205,6 @@ EXPORT_SYMBOL(remove_inode_hash); */ void generic_delete_inode(struct inode *inode) { - const struct super_operations *op = inode->i_sb->s_op; - list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); @@ -1203,17 +1212,8 @@ void generic_delete_inode(struct inode *inode) inodes_stat.nr_inodes--; spin_unlock(&inode_lock); - if (op->delete_inode) { - void (*delete)(struct inode *) = op->delete_inode; - /* Filesystems implementing their own - * s_op->delete_inode are required to call - * truncate_inode_pages and clear_inode() - * internally */ - delete(inode); - } else { - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); - } + evict(inode, 1); + spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); @@ -1268,9 +1268,7 @@ static void generic_forget_inode(struct inode *inode) { if (!generic_detach_inode(inode)) return; - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + evict(inode, 0); wake_up_inode(inode); destroy_inode(inode); } -- cgit v1.2.3-70-g09d2 From be7ce4161f9e6bf2497f90337d1214aa6ee06e15 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 19:40:39 -0400 Subject: New method - evict_inode() Hybrid of ->clear_inode() and ->delete_inode(); if present, does all fs work to be done when in-core inode is about to be gone, for whatever reason. Signed-off-by: Al Viro --- fs/inode.c | 4 +++- include/linux/fs.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index 60cb2596976..474a72f571a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -325,7 +325,9 @@ static void evict(struct inode *inode, int delete) { const struct super_operations *op = inode->i_sb->s_op; - if (delete && op->delete_inode) { + if (op->evict_inode) { + op->evict_inode(inode); + } else if (delete && op->delete_inode) { op->delete_inode(inode); } else { if (inode->i_data.nrpages) diff --git a/include/linux/fs.h b/include/linux/fs.h index 218693d8d44..ce50be4b0b4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1563,6 +1563,7 @@ struct super_operations { void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, struct writeback_control *wbc); void (*drop_inode) (struct inode *); + void (*evict_inode) (struct inode *); void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); -- cgit v1.2.3-70-g09d2 From 2bbbda308f5ca027d4fd721f914c0cab88d49aec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 19:52:12 -0400 Subject: switch hugetlbfs to ->evict_inode() The first spoils - hugetlb can use default ->drop_inode() now. Signed-off-by: Al Viro --- fs/hugetlbfs/inode.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d5f019d48b0..bf1a2f400e7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -371,29 +371,12 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) hugetlb_unreserve_pages(inode, start, freed); } -static void hugetlbfs_delete_inode(struct inode *inode) +static void hugetlbfs_evict_inode(struct inode *inode) { truncate_hugepages(inode, 0); clear_inode(inode); } -static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) -{ - if (generic_detach_inode(inode)) { - truncate_hugepages(inode, 0); - clear_inode(inode); - destroy_inode(inode); - } -} - -static void hugetlbfs_drop_inode(struct inode *inode) -{ - if (!inode->i_nlink) - generic_delete_inode(inode); - else - hugetlbfs_forget_inode(inode); -} - static inline void hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) { @@ -713,9 +696,8 @@ static const struct inode_operations hugetlbfs_inode_operations = { static const struct super_operations hugetlbfs_ops = { .alloc_inode = hugetlbfs_alloc_inode, .destroy_inode = hugetlbfs_destroy_inode, + .evict_inode = hugetlbfs_evict_inode, .statfs = hugetlbfs_statfs, - .delete_inode = hugetlbfs_delete_inode, - .drop_inode = hugetlbfs_drop_inode, .put_super = hugetlbfs_put_super, .show_options = generic_show_options, }; -- cgit v1.2.3-70-g09d2 From c6287315cb958e740466555ca5e9d007f25b39bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 19:56:17 -0400 Subject: generic_detach_inode() can be static now Signed-off-by: Al Viro --- fs/inode.c | 3 +-- include/linux/fs.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 474a72f571a..256e620c641 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1234,7 +1234,7 @@ EXPORT_SYMBOL(generic_delete_inode); * * Returns 1 if inode should be completely destroyed. */ -int generic_detach_inode(struct inode *inode) +static int generic_detach_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -1264,7 +1264,6 @@ int generic_detach_inode(struct inode *inode) spin_unlock(&inode_lock); return 1; } -EXPORT_SYMBOL_GPL(generic_detach_inode); static void generic_forget_inode(struct inode *inode) { diff --git a/include/linux/fs.h b/include/linux/fs.h index ce50be4b0b4..e0ecb8e75eb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2167,7 +2167,6 @@ extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); extern void generic_drop_inode(struct inode *inode); -extern int generic_detach_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), -- cgit v1.2.3-70-g09d2 From 661074e91b1da1ee262dfde6dd836deacccb9def Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 20:19:55 -0400 Subject: Take ->i_bdev/->i_cdev handling out of clear_inode() All call chains to clear_inode() pass through evict_inode() and clear_inode() should be called by evict_inode() exactly once. So we can pull i_bdev/i_cdev detaching up to evict_inode() itself. Signed-off-by: Al Viro --- fs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 256e620c641..9aff7deaf81 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -313,10 +313,6 @@ void clear_inode(struct inode *inode) inode_sync_wait(inode); if (inode->i_sb->s_op->clear_inode) inode->i_sb->s_op->clear_inode(inode); - if (S_ISBLK(inode->i_mode) && inode->i_bdev) - bd_forget(inode); - if (S_ISCHR(inode->i_mode) && inode->i_cdev) - cd_forget(inode); inode->i_state = I_FREEING | I_CLEAR; } EXPORT_SYMBOL(clear_inode); @@ -334,6 +330,10 @@ static void evict(struct inode *inode, int delete) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); } + if (S_ISBLK(inode->i_mode) && inode->i_bdev) + bd_forget(inode); + if (S_ISCHR(inode->i_mode) && inode->i_cdev) + cd_forget(inode); } /* -- cgit v1.2.3-70-g09d2 From b0683aa638b3326c6fc22e5290dfa75e08bd83f5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 20:55:25 -0400 Subject: new helper: end_writeback() Essentially, the minimal variant of ->evict_inode(). It's a trimmed-down clear_inode(), sans any fs callbacks. Once it returns we know that no async writeback will be happening; every ->evict_inode() instance should do that once and do that before doing anything ->write_inode() could interfere with (e.g. freeing the on-disk inode). Signed-off-by: Al Viro --- fs/hugetlbfs/inode.c | 2 +- fs/inode.c | 12 ++++++++++++ include/linux/fs.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index bf1a2f400e7..6e5bd42f386 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -374,7 +374,7 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) static void hugetlbfs_evict_inode(struct inode *inode) { truncate_hugepages(inode, 0); - clear_inode(inode); + end_writeback(inode); } static inline void diff --git a/fs/inode.c b/fs/inode.c index 9aff7deaf81..93e7a5ecbc2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -294,6 +294,18 @@ void __iget(struct inode *inode) inodes_stat.nr_unused--; } +void end_writeback(struct inode *inode) +{ + might_sleep(); + BUG_ON(inode->i_data.nrpages); + BUG_ON(!list_empty(&inode->i_data.private_list)); + BUG_ON(!(inode->i_state & I_FREEING)); + BUG_ON(inode->i_state & I_CLEAR); + inode_sync_wait(inode); + inode->i_state = I_FREEING | I_CLEAR; +} +EXPORT_SYMBOL(end_writeback); + /** * clear_inode - clear an inode * @inode: inode to clear diff --git a/include/linux/fs.h b/include/linux/fs.h index e0ecb8e75eb..3c23c1dcb1b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2184,6 +2184,7 @@ extern void unlock_new_inode(struct inode *); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); +extern void end_writeback(struct inode *); extern void destroy_inode(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); -- cgit v1.2.3-70-g09d2 From b69257f2502e046beac7b9a3086fda552eb4d7e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 21:02:59 -0400 Subject: switch hypfs to ->evict_inode() ... and since we never hash its inodes, default ->drop_inode() will work just fine. Signed-off-by: Al Viro --- arch/s390/hypfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6b120f07304..98a4a4c267a 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -117,10 +117,10 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) return ret; } -static void hypfs_drop_inode(struct inode *inode) +static void hypfs_evict_inode(struct inode *inode) { + end_writeback(inode); kfree(inode->i_private); - generic_delete_inode(inode); } static int hypfs_open(struct inode *inode, struct file *filp) @@ -460,7 +460,7 @@ static struct file_system_type hypfs_type = { static const struct super_operations hypfs_s_ops = { .statfs = simple_statfs, - .drop_inode = hypfs_drop_inode, + .evict_inode = hypfs_evict_inode, .show_options = hypfs_show_options, }; -- cgit v1.2.3-70-g09d2 From 77b8a75f5bb461951148a7211ef30eecac5cb662 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 21:19:01 -0400 Subject: simplify get_cramfs_inode() simply don't hash the inodes that don't have real inumber instead of skipping them during iget5_locked(); as the result, simple iget_locked() would do and we can get rid of cramfs ->drop_inode() as well. Signed-off-by: Al Viro --- fs/cramfs/inode.c | 88 ++++++++++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 50 deletions(-) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index dd3634e4c96..a53b130b366 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -39,66 +39,55 @@ static DEFINE_MUTEX(read_mutex); #define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1) #define OFFSET(x) ((x)->i_ino) - -static int cramfs_iget5_test(struct inode *inode, void *opaque) -{ - struct cramfs_inode *cramfs_inode = opaque; - return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1; -} - -static int cramfs_iget5_set(struct inode *inode, void *opaque) +static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode) { - struct cramfs_inode *cramfs_inode = opaque; - inode->i_ino = CRAMINO(cramfs_inode); - return 0; + static struct timespec zerotime; + inode->i_mode = cramfs_inode->mode; + inode->i_uid = cramfs_inode->uid; + inode->i_size = cramfs_inode->size; + inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; + inode->i_gid = cramfs_inode->gid; + /* Struct copy intentional */ + inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; + /* inode->i_nlink is left 1 - arguably wrong for directories, + but it's the best we can do without reading the directory + contents. 1 yields the right result in GNU find, even + without -noleaf option. */ + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &generic_ro_fops; + inode->i_data.a_ops = &cramfs_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &cramfs_dir_inode_operations; + inode->i_fop = &cramfs_directory_operations; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &page_symlink_inode_operations; + inode->i_data.a_ops = &cramfs_aops; + } else { + init_special_inode(inode, inode->i_mode, + old_decode_dev(cramfs_inode->size)); + } } static struct inode *get_cramfs_inode(struct super_block *sb, struct cramfs_inode * cramfs_inode) { - struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), - cramfs_iget5_test, cramfs_iget5_set, - cramfs_inode); - static struct timespec zerotime; - - if (inode && (inode->i_state & I_NEW)) { - inode->i_mode = cramfs_inode->mode; - inode->i_uid = cramfs_inode->uid; - inode->i_size = cramfs_inode->size; - inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; - inode->i_gid = cramfs_inode->gid; - /* Struct copy intentional */ - inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; - /* inode->i_nlink is left 1 - arguably wrong for directories, - but it's the best we can do without reading the directory - contents. 1 yields the right result in GNU find, even - without -noleaf option. */ - if (S_ISREG(inode->i_mode)) { - inode->i_fop = &generic_ro_fops; - inode->i_data.a_ops = &cramfs_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &cramfs_dir_inode_operations; - inode->i_fop = &cramfs_directory_operations; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_data.a_ops = &cramfs_aops; - } else { - init_special_inode(inode, inode->i_mode, - old_decode_dev(cramfs_inode->size)); + struct inode *inode; + if (CRAMINO(cramfs_inode) == 1) { + inode = new_inode(sb); + if (inode) { + inode->i_ino = 1; + setup_inode(inode, cramfs_inode); + } + } else { + inode = iget_locked(sb, CRAMINO(cramfs_inode)); + if (inode) { + setup_inode(inode, cramfs_inode); + unlock_new_inode(inode); } - unlock_new_inode(inode); } return inode; } -static void cramfs_drop_inode(struct inode *inode) -{ - if (inode->i_ino == 1) - generic_delete_inode(inode); - else - generic_drop_inode(inode); -} - /* * We have our own block cache: don't fill up the buffer cache * with the rom-image, because the way the filesystem is set @@ -542,7 +531,6 @@ static const struct super_operations cramfs_ops = { .put_super = cramfs_put_super, .remount_fs = cramfs_remount, .statfs = cramfs_statfs, - .drop_inode = cramfs_drop_inode, }; static int cramfs_get_sb(struct file_system_type *fs_type, -- cgit v1.2.3-70-g09d2 From 8267952b362b67a5cb5371d6894a772a13e6874c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 22:17:56 -0400 Subject: switch procfs to ->evict_inode() Signed-off-by: Al Viro --- fs/proc/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index aea8502e58a..23561cda724 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -25,11 +25,12 @@ #include "internal.h" -static void proc_delete_inode(struct inode *inode) +static void proc_evict_inode(struct inode *inode) { struct proc_dir_entry *de; truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); /* Stop tracking associated processes */ put_pid(PROC_I(inode)->pid); @@ -40,7 +41,6 @@ static void proc_delete_inode(struct inode *inode) pde_put(de); if (PROC_I(inode)->sysctl) sysctl_head_put(PROC_I(inode)->sysctl); - clear_inode(inode); } struct vfsmount *proc_mnt; @@ -91,7 +91,7 @@ static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, - .delete_inode = proc_delete_inode, + .evict_inode = proc_evict_inode, .statfs = simple_statfs, }; -- cgit v1.2.3-70-g09d2 From 01cd9fef6eb3caae06415861de5b53224b722549 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 22:21:54 -0400 Subject: switch sysfs to ->evict_inode() Signed-off-by: Al Viro --- fs/sysfs/inode.c | 6 +++--- fs/sysfs/mount.c | 2 +- fs/sysfs/sysfs.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 7e187fbd3d4..cffb1fd8ba3 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -312,15 +312,15 @@ struct inode * sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd) * The sysfs_dirent serves as both an inode and a directory entry for sysfs. * To prevent the sysfs inode numbers from being freed prematurely we take a * reference to sysfs_dirent from the sysfs inode. A - * super_operations.delete_inode() implementation is needed to drop that + * super_operations.evict_inode() implementation is needed to drop that * reference upon inode destruction. */ -void sysfs_delete_inode(struct inode *inode) +void sysfs_evict_inode(struct inode *inode) { struct sysfs_dirent *sd = inode->i_private; truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + end_writeback(inode); sysfs_put(sd); } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 281c0c9bc39..f2af22574c5 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -29,7 +29,7 @@ struct kmem_cache *sysfs_dir_cachep; static const struct super_operations sysfs_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, - .delete_inode = sysfs_delete_inode, + .evict_inode = sysfs_evict_inode, }; struct sysfs_dirent sysfs_root = { diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 6a13105b559..d9be60a2e95 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -198,7 +198,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) * inode.c */ struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); -void sysfs_delete_inode(struct inode *inode); +void sysfs_evict_inode(struct inode *inode); int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); int sysfs_permission(struct inode *inode, int mask); int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); -- cgit v1.2.3-70-g09d2 From 5ccb4a78d8c0e27985afec32cc4894d48e7b876e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 22:27:38 -0400 Subject: switch minix to ->evict_inode(), fix write_inode/delete_inode race We need to wait for completion of possible writeback in progress before we clear on-disk inode during deletion. Signed-off-by: Al Viro --- fs/minix/bitmap.c | 6 ++---- fs/minix/inode.c | 15 ++++++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 482779fe4e7..3f32bcb0d9b 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -200,13 +200,13 @@ void minix_free_inode(struct inode * inode) ino = inode->i_ino; if (ino < 1 || ino > sbi->s_ninodes) { printk("minix_free_inode: inode 0 or nonexistent inode\n"); - goto out; + return; } bit = ino & ((1<>= k; if (ino >= sbi->s_imap_blocks) { printk("minix_free_inode: nonexistent imap in superblock\n"); - goto out; + return; } minix_clear_inode(inode); /* clear on-disk copy */ @@ -217,8 +217,6 @@ void minix_free_inode(struct inode * inode) printk("minix_free_inode: bit %lu already cleared\n", bit); spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); - out: - clear_inode(inode); /* clear in-memory copy */ } struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 125062f55ef..e39d6bf2e8f 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -24,12 +24,17 @@ static int minix_write_inode(struct inode *inode, static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); -static void minix_delete_inode(struct inode *inode) +static void minix_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - inode->i_size = 0; - minix_truncate(inode); - minix_free_inode(inode); + if (!inode->i_nlink) { + inode->i_size = 0; + minix_truncate(inode); + } + invalidate_inode_buffers(inode); + end_writeback(inode); + if (!inode->i_nlink) + minix_free_inode(inode); } static void minix_put_super(struct super_block *sb) @@ -96,7 +101,7 @@ static const struct super_operations minix_sops = { .alloc_inode = minix_alloc_inode, .destroy_inode = minix_destroy_inode, .write_inode = minix_write_inode, - .delete_inode = minix_delete_inode, + .evict_inode = minix_evict_inode, .put_super = minix_put_super, .statfs = minix_statfs, .remount_fs = minix_remount, -- cgit v1.2.3-70-g09d2 From 3889717d2851bf38015c0b291026c07c02264623 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 22 Jul 2010 01:13:36 +0400 Subject: ext2: switch to dquot_free_block_nodirty() brute-force conversion Signed-off-by: Al Viro --- fs/ext2/balloc.c | 12 ++++++++---- fs/ext2/xattr.c | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index e8766a39677..db69c1206f1 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -571,7 +571,8 @@ do_more: error_return: brelse(bitmap_bh); release_blocks(sb, freed); - dquot_free_block(inode, freed); + dquot_free_block_nodirty(inode, freed); + mark_inode_dirty(inode); } /** @@ -1418,7 +1419,8 @@ allocated: *errp = 0; brelse(bitmap_bh); - dquot_free_block(inode, *count-num); + dquot_free_block_nodirty(inode, *count-num); + mark_inode_dirty(inode); *count = num; return ret_block; @@ -1428,8 +1430,10 @@ out: /* * Undo the block allocation */ - if (!performed_allocation) - dquot_free_block(inode, *count); + if (!performed_allocation) { + dquot_free_block_nodirty(inode, *count); + mark_inode_dirty(inode); + } brelse(bitmap_bh); return 0; } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 7c3915780b1..0fa24e814d8 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -703,8 +703,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, * written (only some dirty data were not) so we just proceed * as if nothing happened and cleanup the unused block */ if (error && error != -ENOSPC) { - if (new_bh && new_bh != old_bh) - dquot_free_block(inode, 1); + if (new_bh && new_bh != old_bh) { + dquot_free_block_nodirty(inode, 1); + mark_inode_dirty(inode); + } goto cleanup; } } else @@ -736,7 +738,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, le32_add_cpu(&HDR(old_bh)->h_refcount, -1); if (ce) mb_cache_entry_release(ce); - dquot_free_block(inode, 1); + dquot_free_block_nodirty(inode, 1); + mark_inode_dirty(inode); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", le32_to_cpu(HDR(old_bh)->h_refcount)); @@ -799,7 +802,8 @@ ext2_xattr_delete_inode(struct inode *inode) mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); - dquot_free_block(inode, 1); + dquot_free_block_nodirty(inode, 1); + mark_inode_dirty(inode); } EXT2_I(inode)->i_file_acl = 0; -- cgit v1.2.3-70-g09d2 From addacc7d6f0f0bcce12adf9fe9e6455e1dfd74da Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 22 Jul 2010 01:19:42 +0400 Subject: Take dirtying the inode to callers of ext2_free_blocks() Signed-off-by: Al Viro --- fs/ext2/balloc.c | 1 - fs/ext2/inode.c | 6 ++++-- fs/ext2/xattr.c | 3 +++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index db69c1206f1..c6c684b44ea 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -572,7 +572,6 @@ error_return: brelse(bitmap_bh); release_blocks(sb, freed); dquot_free_block_nodirty(inode, freed); - mark_inode_dirty(inode); } /** diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 069620b30d4..e8af26dd671 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -423,6 +423,8 @@ static int ext2_alloc_blocks(struct inode *inode, failed_out: for (i = 0; i 0) { - mark_inode_dirty(inode); ext2_free_blocks (inode, block_to_free, count); + mark_inode_dirty(inode); } } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 0fa24e814d8..25ff041058a 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -674,6 +674,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, new_bh = sb_getblk(sb, block); if (!new_bh) { ext2_free_blocks(inode, block, 1); + mark_inode_dirty(inode); error = -EIO; goto cleanup; } @@ -729,6 +730,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, mb_cache_entry_free(ce); ea_bdebug(old_bh, "freeing"); ext2_free_blocks(inode, old_bh->b_blocknr, 1); + mark_inode_dirty(inode); /* We let our caller release old_bh, so we * need to duplicate the buffer before. */ get_bh(old_bh); @@ -789,6 +791,7 @@ ext2_xattr_delete_inode(struct inode *inode) if (ce) mb_cache_entry_free(ce); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); + mark_inode_dirty(inode); get_bh(bh); bforget(bh); unlock_buffer(bh); -- cgit v1.2.3-70-g09d2 From 3937871d91e4f43e4aaf0b214c68a7857c0e6e80 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 22 Jul 2010 01:22:47 +0400 Subject: Don't dirty the victim in ext2_xattr_delete_inode() ... it's beyond fs-writeback reach already - writeback won't be started at that point. Signed-off-by: Al Viro --- fs/ext2/xattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 25ff041058a..5ab87e6edff 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -791,7 +791,6 @@ ext2_xattr_delete_inode(struct inode *inode) if (ce) mb_cache_entry_free(ce); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); - mark_inode_dirty(inode); get_bh(bh); bforget(bh); unlock_buffer(bh); @@ -806,7 +805,6 @@ ext2_xattr_delete_inode(struct inode *inode) if (IS_SYNC(inode)) sync_dirty_buffer(bh); dquot_free_block_nodirty(inode, 1); - mark_inode_dirty(inode); } EXT2_I(inode)->i_file_acl = 0; -- cgit v1.2.3-70-g09d2 From 72edc4d0873ba5165c0759264298bf5f55351c7a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Jun 2010 23:32:28 -0400 Subject: merge ext2 delete_inode and clear_inode, switch to ->evict_inode() Signed-off-by: Al Viro --- fs/ext2/ext2.h | 2 +- fs/ext2/ialloc.c | 13 ++++--------- fs/ext2/inode.c | 44 ++++++++++++++++++++++++++++++-------------- fs/ext2/super.c | 14 +------------- 4 files changed, 36 insertions(+), 37 deletions(-) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8f53d11bf95..416daa62242 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -119,7 +119,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ extern struct inode *ext2_iget (struct super_block *, unsigned long); extern int ext2_write_inode (struct inode *, struct writeback_control *); -extern void ext2_delete_inode (struct inode *); +extern void ext2_evict_inode(struct inode *); extern int ext2_sync_inode (struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); extern int ext2_setattr (struct dentry *, struct iattr *); diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 938dbc739d0..ad70479aabf 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -118,19 +118,14 @@ void ext2_free_inode (struct inode * inode) * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ - ext2_xattr_delete_inode(inode); - dquot_free_inode(inode); - dquot_drop(inode); - } + /* Quota is already initialized in iput() */ + ext2_xattr_delete_inode(inode); + dquot_free_inode(inode); + dquot_drop(inode); es = EXT2_SB(sb)->s_es; is_directory = S_ISDIR(inode->i_mode); - /* Do this BEFORE marking the inode not in use or returning an error */ - clear_inode (inode); - if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { ext2_error (sb, "ext2_free_inode", diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e8af26dd671..940c9616886 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -69,26 +69,42 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to) /* * Called at the last iput() if i_nlink is zero. */ -void ext2_delete_inode (struct inode * inode) +void ext2_evict_inode(struct inode * inode) { - if (!is_bad_inode(inode)) + struct ext2_block_alloc_info *rsv; + int want_delete = 0; + + if (!inode->i_nlink && !is_bad_inode(inode)) { + want_delete = 1; dquot_initialize(inode); + } else { + dquot_drop(inode); + } + truncate_inode_pages(&inode->i_data, 0); - if (is_bad_inode(inode)) - goto no_delete; - EXT2_I(inode)->i_dtime = get_seconds(); - mark_inode_dirty(inode); - __ext2_write_inode(inode, inode_needs_sync(inode)); + if (want_delete) { + /* set dtime */ + EXT2_I(inode)->i_dtime = get_seconds(); + mark_inode_dirty(inode); + __ext2_write_inode(inode, inode_needs_sync(inode)); + /* truncate to 0 */ + inode->i_size = 0; + if (inode->i_blocks) + ext2_truncate_blocks(inode, 0); + } - inode->i_size = 0; - if (inode->i_blocks) - ext2_truncate_blocks(inode, 0); - ext2_free_inode (inode); + invalidate_inode_buffers(inode); + end_writeback(inode); + + ext2_discard_reservation(inode); + rsv = EXT2_I(inode)->i_block_alloc_info; + EXT2_I(inode)->i_block_alloc_info = NULL; + if (unlikely(rsv)) + kfree(rsv); - return; -no_delete: - clear_inode(inode); /* We must guarantee clearing of inode... */ + if (want_delete) + ext2_free_inode(inode); } typedef struct { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7ff43f4a59c..1ec602673ea 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -195,17 +195,6 @@ static void destroy_inodecache(void) kmem_cache_destroy(ext2_inode_cachep); } -static void ext2_clear_inode(struct inode *inode) -{ - struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; - - dquot_drop(inode); - ext2_discard_reservation(inode); - EXT2_I(inode)->i_block_alloc_info = NULL; - if (unlikely(rsv)) - kfree(rsv); -} - static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct super_block *sb = vfs->mnt_sb; @@ -299,13 +288,12 @@ static const struct super_operations ext2_sops = { .alloc_inode = ext2_alloc_inode, .destroy_inode = ext2_destroy_inode, .write_inode = ext2_write_inode, - .delete_inode = ext2_delete_inode, + .evict_inode = ext2_evict_inode, .put_super = ext2_put_super, .write_super = ext2_write_super, .sync_fs = ext2_sync_fs, .statfs = ext2_statfs, .remount_fs = ext2_remount, - .clear_inode = ext2_clear_inode, .show_options = ext2_show_options, #ifdef CONFIG_QUOTA .quota_read = ext2_quota_read, -- cgit v1.2.3-70-g09d2 From 6d8af64c1c5ad4fd119b371ae70c114ee1f443b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2010 16:29:45 -0400 Subject: switch mqueue to ->evict_inode() ... and since the inodes are never hashed, we can use default ->drop_inode() just fine. Signed-off-by: Al Viro --- ipc/mqueue.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c93fd3faac2..c60e519e291 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -158,7 +158,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) { spin_unlock(&mq_lock); - /* mqueue_delete_inode() releases info->messages */ + /* mqueue_evict_inode() releases info->messages */ goto out_inode; } u->mq_bytes += mq_bytes; @@ -241,7 +241,7 @@ static void mqueue_destroy_inode(struct inode *inode) kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); } -static void mqueue_delete_inode(struct inode *inode) +static void mqueue_evict_inode(struct inode *inode) { struct mqueue_inode_info *info; struct user_struct *user; @@ -249,10 +249,11 @@ static void mqueue_delete_inode(struct inode *inode) int i; struct ipc_namespace *ipc_ns; - if (S_ISDIR(inode->i_mode)) { - clear_inode(inode); + end_writeback(inode); + + if (S_ISDIR(inode->i_mode)) return; - } + ipc_ns = get_ns_from_inode(inode); info = MQUEUE_I(inode); spin_lock(&info->lock); @@ -261,8 +262,6 @@ static void mqueue_delete_inode(struct inode *inode) kfree(info->messages); spin_unlock(&info->lock); - clear_inode(inode); - /* Total amount of bytes accounted for the mqueue */ mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *) + info->attr.mq_msgsize); @@ -1225,9 +1224,8 @@ static const struct file_operations mqueue_file_operations = { static const struct super_operations mqueue_super_ops = { .alloc_inode = mqueue_alloc_inode, .destroy_inode = mqueue_destroy_inode, + .evict_inode = mqueue_evict_inode, .statfs = simple_statfs, - .delete_inode = mqueue_delete_inode, - .drop_inode = generic_delete_inode, }; static struct file_system_type mqueue_fs_type = { -- cgit v1.2.3-70-g09d2 From 1f895f75dc0881592ef21488aac36cfb2b6ca1e3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2010 19:10:41 -0400 Subject: switch shmem.c to ->evice_inode() Signed-off-by: Al Viro --- mm/shmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 33222ba256f..1529d8fdc23 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -820,7 +820,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) return error; } -static void shmem_delete_inode(struct inode *inode) +static void shmem_evict_inode(struct inode *inode) { struct shmem_inode_info *info = SHMEM_I(inode); @@ -837,7 +837,7 @@ static void shmem_delete_inode(struct inode *inode) } BUG_ON(inode->i_blocks); shmem_free_inode(inode->i_sb); - clear_inode(inode); + end_writeback(inode); } static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) @@ -934,7 +934,7 @@ found: /* * Move _head_ to start search for next from here. - * But be careful: shmem_delete_inode checks list_empty without taking + * But be careful: shmem_evict_inode checks list_empty without taking * mutex, and there's an instant in list_move_tail when info->swaplist * would appear empty, if it were the only one on shmem_swaplist. We * could avoid doing it if inode NULL; or use this minor optimization. @@ -2497,7 +2497,7 @@ static const struct super_operations shmem_ops = { .remount_fs = shmem_remount_fs, .show_options = shmem_show_options, #endif - .delete_inode = shmem_delete_inode, + .evict_inode = shmem_evict_inode, .drop_inode = generic_delete_inode, .put_super = shmem_put_super, }; -- cgit v1.2.3-70-g09d2 From d299eadc098743ea0cfbf9502fb04abf1d39ce36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2010 19:16:20 -0400 Subject: switch sysv to ->evict_inode() Signed-off-by: Al Viro --- fs/sysv/ialloc.c | 1 - fs/sysv/inode.c | 15 ++++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index fcc498ec9b3..0c96c98bd1d 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -113,7 +113,6 @@ void sysv_free_inode(struct inode * inode) return; } raw_inode = sysv_raw_inode(sb, ino, &bh); - clear_inode(inode); if (!raw_inode) { printk("sysv_free_inode: unable to read inode block on device " "%s\n", inode->i_sb->s_id); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index d4a5380b566..613a5056e88 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -308,12 +308,17 @@ int sysv_sync_inode(struct inode *inode) return __sysv_write_inode(inode, 1); } -static void sysv_delete_inode(struct inode *inode) +static void sysv_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - inode->i_size = 0; - sysv_truncate(inode); - sysv_free_inode(inode); + if (!inode->i_nlink) { + inode->i_size = 0; + sysv_truncate(inode); + } + invalidate_inode_buffers(inode); + end_writeback(inode); + if (!inode->i_nlink) + sysv_free_inode(inode); } static struct kmem_cache *sysv_inode_cachep; @@ -344,7 +349,7 @@ const struct super_operations sysv_sops = { .alloc_inode = sysv_alloc_inode, .destroy_inode = sysv_destroy_inode, .write_inode = sysv_write_inode, - .delete_inode = sysv_delete_inode, + .evict_inode = sysv_evict_inode, .put_super = sysv_put_super, .write_super = sysv_write_super, .sync_fs = sysv_sync_fs, -- cgit v1.2.3-70-g09d2 From d3b4f9ae184b0a3982dbe000ddf88952f090dc28 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2010 19:22:50 -0400 Subject: switch smbfs to evict_inode() NB: treatment of inode hash is completely braindead there Signed-off-by: Al Viro --- fs/smbfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index e338f0a5a70..450c9194198 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -46,7 +46,7 @@ #define SMB_TTL_DEFAULT 1000 -static void smb_delete_inode(struct inode *); +static void smb_evict_inode(struct inode *); static void smb_put_super(struct super_block *); static int smb_statfs(struct dentry *, struct kstatfs *); static int smb_show_options(struct seq_file *, struct vfsmount *); @@ -102,7 +102,7 @@ static const struct super_operations smb_sops = .alloc_inode = smb_alloc_inode, .destroy_inode = smb_destroy_inode, .drop_inode = generic_delete_inode, - .delete_inode = smb_delete_inode, + .evict_inode = smb_evict_inode, .put_super = smb_put_super, .statfs = smb_statfs, .show_options = smb_show_options, @@ -324,15 +324,15 @@ out: * All blocking cleanup operations need to go here to avoid races. */ static void -smb_delete_inode(struct inode *ino) +smb_evict_inode(struct inode *ino) { DEBUG1("ino=%ld\n", ino->i_ino); truncate_inode_pages(&ino->i_data, 0); + end_writeback(ino); lock_kernel(); if (smb_close(ino)) PARANOIA("could not close inode %ld\n", ino->i_ino); unlock_kernel(); - clear_inode(ino); } static struct option opts[] = { -- cgit v1.2.3-70-g09d2 From deee3ce466a3e2cfb54c93b8fd22bbccd19ea7d6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2010 19:28:32 -0400 Subject: covert fatfs to ->evict_inode() Signed-off-by: Al Viro --- fs/fat/inode.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ec6a699a402..830058057d3 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -263,7 +263,7 @@ static const struct address_space_operations fat_aops = { * check if the location is still valid and retry if it * isn't. Otherwise we do changes. * 5. Spinlock is used to protect hash/unhash/location check/lookup - * 6. fat_clear_inode() unhashes the F-d-c entry. + * 6. fat_evict_inode() unhashes the F-d-c entry. * 7. lookup() and readdir() do igrab() if they find a F-d-c entry * and consider negative result as cache miss. */ @@ -448,16 +448,15 @@ out: EXPORT_SYMBOL_GPL(fat_build_inode); -static void fat_delete_inode(struct inode *inode) +static void fat_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - inode->i_size = 0; - fat_truncate_blocks(inode, 0); - clear_inode(inode); -} - -static void fat_clear_inode(struct inode *inode) -{ + if (!inode->i_nlink) { + inode->i_size = 0; + fat_truncate_blocks(inode, 0); + } + invalidate_inode_buffers(inode); + end_writeback(inode); fat_cache_inval_inode(inode); fat_detach(inode); } @@ -674,12 +673,11 @@ static const struct super_operations fat_sops = { .alloc_inode = fat_alloc_inode, .destroy_inode = fat_destroy_inode, .write_inode = fat_write_inode, - .delete_inode = fat_delete_inode, + .evict_inode = fat_evict_inode, .put_super = fat_put_super, .write_super = fat_write_super, .sync_fs = fat_sync_fs, .statfs = fat_statfs, - .clear_inode = fat_clear_inode, .remount_fs = fat_remount, .show_options = fat_show_options, -- cgit v1.2.3-70-g09d2 From 58e8268c7bae538ccb8b7eccc817c1c28bcd4da2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2010 19:40:56 -0400 Subject: switch ufs to ->evict_inode() Signed-off-by: Al Viro --- fs/ufs/ialloc.c | 2 -- fs/ufs/inode.c | 43 ++++++++++++++++++++++++++----------------- fs/ufs/super.c | 2 +- fs/ufs/ufs.h | 2 +- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 594480e537d..428017e018f 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -94,8 +94,6 @@ void ufs_free_inode (struct inode * inode) is_directory = S_ISDIR(inode->i_mode); - clear_inode (inode); - if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino); else { diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 45cafa937a4..2b251f2093a 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -911,24 +911,33 @@ int ufs_sync_inode (struct inode *inode) return ufs_update_inode (inode, 1); } -void ufs_delete_inode (struct inode * inode) +void ufs_evict_inode(struct inode * inode) { - loff_t old_i_size; + int want_delete = 0; + + if (!inode->i_nlink && !is_bad_inode(inode)) + want_delete = 1; truncate_inode_pages(&inode->i_data, 0); - if (is_bad_inode(inode)) - goto no_delete; - /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ - lock_kernel(); - mark_inode_dirty(inode); - ufs_update_inode(inode, IS_SYNC(inode)); - old_i_size = inode->i_size; - inode->i_size = 0; - if (inode->i_blocks && ufs_truncate(inode, old_i_size)) - ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); - ufs_free_inode (inode); - unlock_kernel(); - return; -no_delete: - clear_inode(inode); /* We must guarantee clearing of inode... */ + if (want_delete) { + loff_t old_i_size; + /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ + lock_kernel(); + mark_inode_dirty(inode); + ufs_update_inode(inode, IS_SYNC(inode)); + old_i_size = inode->i_size; + inode->i_size = 0; + if (inode->i_blocks && ufs_truncate(inode, old_i_size)) + ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); + unlock_kernel(); + } + + invalidate_inode_buffers(inode); + end_writeback(inode); + + if (want_delete) { + lock_kernel(); + ufs_free_inode (inode); + unlock_kernel(); + } } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3ec5a9eb6ef..d510c1b9181 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1440,7 +1440,7 @@ static const struct super_operations ufs_super_ops = { .alloc_inode = ufs_alloc_inode, .destroy_inode = ufs_destroy_inode, .write_inode = ufs_write_inode, - .delete_inode = ufs_delete_inode, + .evict_inode = ufs_evict_inode, .put_super = ufs_put_super, .write_super = ufs_write_super, .sync_fs = ufs_sync_fs, diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 179ae6b3180..c08782e1b48 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -108,7 +108,7 @@ extern struct inode * ufs_new_inode (struct inode *, int); extern struct inode *ufs_iget(struct super_block *, unsigned long); extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_sync_inode (struct inode *); -extern void ufs_delete_inode (struct inode *); +extern void ufs_evict_inode (struct inode *); extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); -- cgit v1.2.3-70-g09d2 From 0f3f63a4700d7c5aed51491b0113a257cf290a56 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2010 21:20:32 -0400 Subject: spufs conversion to ->evict_inode() Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 32625f366fb..5dec408d670 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -143,15 +143,14 @@ out: } static void -spufs_delete_inode(struct inode *inode) +spufs_evict_inode(struct inode *inode) { struct spufs_inode_info *ei = SPUFS_I(inode); - + end_writeback(inode); if (ei->i_ctx) put_spu_context(ei->i_ctx); if (ei->i_gang) put_spu_gang(ei->i_gang); - clear_inode(inode); } static void spufs_prune_dir(struct dentry *dir) @@ -779,8 +778,7 @@ spufs_fill_super(struct super_block *sb, void *data, int silent) .alloc_inode = spufs_alloc_inode, .destroy_inode = spufs_destroy_inode, .statfs = simple_statfs, - .delete_inode = spufs_delete_inode, - .drop_inode = generic_delete_inode, + .evict_inode = spufs_evict_inode, .show_options = generic_show_options, }; -- cgit v1.2.3-70-g09d2 From ac14a95b5239d37b6082c3791b88d7ab4e8e444c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 07:08:19 -0400 Subject: convert ext3 to ->evict_inode() Signed-off-by: Al Viro --- fs/ext3/ialloc.c | 12 ------------ fs/ext3/inode.c | 37 +++++++++++++++++++++++++++---------- fs/ext3/super.c | 14 +------------- include/linux/ext3_fs.h | 2 +- 4 files changed, 29 insertions(+), 36 deletions(-) diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 498021eb88f..4ab72db3559 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -119,20 +119,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) ino = inode->i_ino; ext3_debug ("freeing inode %lu\n", ino); - /* - * Note: we must free any quota before locking the superblock, - * as writing the quota to disk may need the lock as well. - */ - dquot_initialize(inode); - ext3_xattr_delete_inode(handle, inode); - dquot_free_inode(inode); - dquot_drop(inode); - is_directory = S_ISDIR(inode->i_mode); - /* Do this BEFORE marking the inode not in use or returning an error */ - clear_inode (inode); - es = EXT3_SB(sb)->s_es; if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { ext3_error (sb, "ext3_free_inode", diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index b04d1193668..cc55cecf9fb 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -190,18 +190,28 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode) } /* - * Called at the last iput() if i_nlink is zero. + * Called at inode eviction from icache */ -void ext3_delete_inode (struct inode * inode) +void ext3_evict_inode (struct inode *inode) { + struct ext3_block_alloc_info *rsv; handle_t *handle; + int want_delete = 0; - if (!is_bad_inode(inode)) + if (!inode->i_nlink && !is_bad_inode(inode)) { dquot_initialize(inode); + want_delete = 1; + } truncate_inode_pages(&inode->i_data, 0); - if (is_bad_inode(inode)) + ext3_discard_reservation(inode); + rsv = EXT3_I(inode)->i_block_alloc_info; + EXT3_I(inode)->i_block_alloc_info = NULL; + if (unlikely(rsv)) + kfree(rsv); + + if (!want_delete) goto no_delete; handle = start_transaction(inode); @@ -238,15 +248,22 @@ void ext3_delete_inode (struct inode * inode) * having errors), but we can't free the inode if the mark_dirty * fails. */ - if (ext3_mark_inode_dirty(handle, inode)) - /* If that failed, just do the required in-core inode clear. */ - clear_inode(inode); - else + if (ext3_mark_inode_dirty(handle, inode)) { + /* If that failed, just dquot_drop() and be done with that */ + dquot_drop(inode); + end_writeback(inode); + } else { + ext3_xattr_delete_inode(handle, inode); + dquot_free_inode(inode); + dquot_drop(inode); + end_writeback(inode); ext3_free_inode(handle, inode); + } ext3_journal_stop(handle); return; no_delete: - clear_inode(inode); /* We must guarantee clearing of inode... */ + end_writeback(inode); + dquot_drop(inode); } typedef struct { @@ -2564,7 +2581,7 @@ out_stop: * If this was a simple ftruncate(), and the file will remain alive * then we need to clear up the orphan record which we created above. * However, if this was a real unlink then we were called by - * ext3_delete_inode(), and we allow that function to clean up the + * ext3_evict_inode(), and we allow that function to clean up the * orphan info for us. */ if (inode->i_nlink) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6c953bb255e..a951fd5c081 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -527,17 +527,6 @@ static void destroy_inodecache(void) kmem_cache_destroy(ext3_inode_cachep); } -static void ext3_clear_inode(struct inode *inode) -{ - struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; - - dquot_drop(inode); - ext3_discard_reservation(inode); - EXT3_I(inode)->i_block_alloc_info = NULL; - if (unlikely(rsv)) - kfree(rsv); -} - static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) { #if defined(CONFIG_QUOTA) @@ -783,14 +772,13 @@ static const struct super_operations ext3_sops = { .destroy_inode = ext3_destroy_inode, .write_inode = ext3_write_inode, .dirty_inode = ext3_dirty_inode, - .delete_inode = ext3_delete_inode, + .evict_inode = ext3_evict_inode, .put_super = ext3_put_super, .sync_fs = ext3_sync_fs, .freeze_fs = ext3_freeze, .unfreeze_fs = ext3_unfreeze, .statfs = ext3_statfs, .remount_fs = ext3_remount, - .clear_inode = ext3_clear_inode, .show_options = ext3_show_options, #ifdef CONFIG_QUOTA .quota_read = ext3_quota_read, diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 7fc62d4550b..e7cb2176699 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -896,7 +896,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, extern struct inode *ext3_iget(struct super_block *, unsigned long); extern int ext3_write_inode (struct inode *, struct writeback_control *); extern int ext3_setattr (struct dentry *, struct iattr *); -extern void ext3_delete_inode (struct inode *); +extern void ext3_evict_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); extern void ext3_discard_reservation (struct inode *); extern void ext3_dirty_inode(struct inode *); -- cgit v1.2.3-70-g09d2 From 9df2f85128def59185f8a1c584f8af41df17405a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 09:50:39 -0400 Subject: switch bfs to ->evict_inode(), clean up Signed-off-by: Al Viro --- fs/bfs/inode.c | 70 +++++++++++++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index f22a7d3dc36..0499822b156 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -99,6 +99,24 @@ error: return ERR_PTR(-EIO); } +static struct bfs_inode *find_inode(struct super_block *sb, u16 ino, struct buffer_head **p) +{ + if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(sb)->si_lasti)) { + printf("Bad inode number %s:%08x\n", sb->s_id, ino); + return ERR_PTR(-EIO); + } + + ino -= BFS_ROOT_INO; + + *p = sb_bread(sb, 1 + ino / BFS_INODES_PER_BLOCK); + if (!*p) { + printf("Unable to read inode %s:%08x\n", sb->s_id, ino); + return ERR_PTR(-EIO); + } + + return (struct bfs_inode *)(*p)->b_data + ino % BFS_INODES_PER_BLOCK; +} + static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct bfs_sb_info *info = BFS_SB(inode->i_sb); @@ -106,28 +124,15 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) unsigned long i_sblock; struct bfs_inode *di; struct buffer_head *bh; - int block, off; int err = 0; dprintf("ino=%08x\n", ino); - if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { - printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); - return -EIO; - } + di = find_inode(inode->i_sb, ino, &bh); + if (IS_ERR(di)) + return PTR_ERR(di); mutex_lock(&info->bfs_lock); - block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; - bh = sb_bread(inode->i_sb, block); - if (!bh) { - printf("Unable to read inode %s:%08x\n", - inode->i_sb->s_id, ino); - mutex_unlock(&info->bfs_lock); - return -EIO; - } - - off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; - di = (struct bfs_inode *)bh->b_data + off; if (ino == BFS_ROOT_INO) di->i_vtype = cpu_to_le32(BFS_VDIR); @@ -158,12 +163,11 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) return err; } -static void bfs_delete_inode(struct inode *inode) +static void bfs_evict_inode(struct inode *inode) { unsigned long ino = inode->i_ino; struct bfs_inode *di; struct buffer_head *bh; - int block, off; struct super_block *s = inode->i_sb; struct bfs_sb_info *info = BFS_SB(s); struct bfs_inode_info *bi = BFS_I(inode); @@ -171,28 +175,19 @@ static void bfs_delete_inode(struct inode *inode) dprintf("ino=%08lx\n", ino); truncate_inode_pages(&inode->i_data, 0); + invalidate_inode_buffers(inode); + end_writeback(inode); - if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) { - printf("invalid ino=%08lx\n", ino); + if (inode->i_nlink) return; - } - - inode->i_size = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; - mutex_lock(&info->bfs_lock); - mark_inode_dirty(inode); - block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; - bh = sb_bread(s, block); - if (!bh) { - printf("Unable to read inode %s:%08lx\n", - inode->i_sb->s_id, ino); - mutex_unlock(&info->bfs_lock); + di = find_inode(s, inode->i_ino, &bh); + if (IS_ERR(di)) return; - } - off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; - di = (struct bfs_inode *)bh->b_data + off; - memset((void *)di, 0, sizeof(struct bfs_inode)); + + mutex_lock(&info->bfs_lock); + /* clear on-disk inode */ + memset(di, 0, sizeof(struct bfs_inode)); mark_buffer_dirty(bh); brelse(bh); @@ -214,7 +209,6 @@ static void bfs_delete_inode(struct inode *inode) mark_buffer_dirty(info->si_sbh); } mutex_unlock(&info->bfs_lock); - clear_inode(inode); } static int bfs_sync_fs(struct super_block *sb, int wait) @@ -319,7 +313,7 @@ static const struct super_operations bfs_sops = { .alloc_inode = bfs_alloc_inode, .destroy_inode = bfs_destroy_inode, .write_inode = bfs_write_inode, - .delete_inode = bfs_delete_inode, + .evict_inode = bfs_evict_inode, .put_super = bfs_put_super, .write_super = bfs_write_super, .sync_fs = bfs_sync_fs, -- cgit v1.2.3-70-g09d2 From 69c9e750176b409559b2361fbb28fa7bbf3c5461 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 10:12:01 -0400 Subject: switch omfs to ->evict_inode() Signed-off-by: Al Viro --- fs/omfs/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 089839a6cc6..56121debc22 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -175,9 +175,13 @@ int omfs_sync_inode(struct inode *inode) * called when an entry is deleted, need to clear the bits in the * bitmaps. */ -static void omfs_delete_inode(struct inode *inode) +static void omfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); + + if (inode->i_nlink) + return; if (S_ISREG(inode->i_mode)) { inode->i_size = 0; @@ -185,7 +189,6 @@ static void omfs_delete_inode(struct inode *inode) } omfs_clear_range(inode->i_sb, inode->i_ino, 2); - clear_inode(inode); } struct inode *omfs_iget(struct super_block *sb, ino_t ino) @@ -284,7 +287,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf) static const struct super_operations omfs_sops = { .write_inode = omfs_write_inode, - .delete_inode = omfs_delete_inode, + .evict_inode = omfs_evict_inode, .put_super = omfs_put_super, .statfs = omfs_statfs, .show_options = generic_show_options, -- cgit v1.2.3-70-g09d2 From f053ddde7575090e09e2f5c4233d8a19f0925b93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 10:16:41 -0400 Subject: switch affs to ->evict_inode() Signed-off-by: Al Viro --- fs/affs/affs.h | 3 +-- fs/affs/inode.c | 25 ++++++++++++------------- fs/affs/super.c | 3 +-- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index f05b6155ccc..a8cbdeb3402 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -171,8 +171,7 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, extern unsigned long affs_parent_ino(struct inode *dir); extern struct inode *affs_new_inode(struct inode *dir); extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); -extern void affs_delete_inode(struct inode *inode); -extern void affs_clear_inode(struct inode *inode); +extern void affs_evict_inode(struct inode *inode); extern struct inode *affs_iget(struct super_block *sb, unsigned long ino); extern int affs_write_inode(struct inode *inode, diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 6883d5fb84c..3a0fdec175b 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -252,23 +252,19 @@ out: } void -affs_delete_inode(struct inode *inode) -{ - pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); - truncate_inode_pages(&inode->i_data, 0); - inode->i_size = 0; - affs_truncate(inode); - clear_inode(inode); - affs_free_block(inode->i_sb, inode->i_ino); -} - -void -affs_clear_inode(struct inode *inode) +affs_evict_inode(struct inode *inode) { unsigned long cache_page; + pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); + truncate_inode_pages(&inode->i_data, 0); - pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); + if (!inode->i_nlink) { + inode->i_size = 0; + affs_truncate(inode); + } + invalidate_inode_buffers(inode); + end_writeback(inode); affs_free_prealloc(inode); cache_page = (unsigned long)AFFS_I(inode)->i_lc; if (cache_page) { @@ -280,6 +276,9 @@ affs_clear_inode(struct inode *inode) affs_brelse(AFFS_I(inode)->i_ext_bh); AFFS_I(inode)->i_ext_last = ~1; AFFS_I(inode)->i_ext_bh = NULL; + + if (!inode->i_nlink) + affs_free_block(inode->i_sb, inode->i_ino); } struct inode * diff --git a/fs/affs/super.c b/fs/affs/super.c index 16a3e4765f6..2c804a87c14 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -140,8 +140,7 @@ static const struct super_operations affs_sops = { .alloc_inode = affs_alloc_inode, .destroy_inode = affs_destroy_inode, .write_inode = affs_write_inode, - .delete_inode = affs_delete_inode, - .clear_inode = affs_clear_inode, + .evict_inode = affs_evict_inode, .put_super = affs_put_super, .write_super = affs_write_super, .sync_fs = affs_sync_fs, -- cgit v1.2.3-70-g09d2 From e971a6d7b9daebfe2c11c590377d3933410ab929 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 15:16:17 -0400 Subject: stop icache pollution in hostfs, switch to ->evict_inode() Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 7943ff11d48..fab5f5a1e6f 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -241,16 +241,13 @@ static struct inode *hostfs_iget(struct super_block *sb) struct inode *inode; long ret; - inode = iget_locked(sb, 0); + inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); - if (inode->i_state & I_NEW) { - ret = hostfs_read_inode(inode); - if (ret < 0) { - iget_failed(inode); - return ERR_PTR(ret); - } - unlock_new_inode(inode); + ret = hostfs_read_inode(inode); + if (ret < 0) { + iput(inode); + return ERR_PTR(ret); } return inode; } @@ -299,29 +296,19 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) return &hi->vfs_inode; } -static void hostfs_delete_inode(struct inode *inode) +static void hostfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); if (HOSTFS_I(inode)->fd != -1) { close_file(&HOSTFS_I(inode)->fd); HOSTFS_I(inode)->fd = -1; } - clear_inode(inode); } static void hostfs_destroy_inode(struct inode *inode) { kfree(HOSTFS_I(inode)->host_filename); - - /* - * XXX: This should not happen, probably. The check is here for - * additional safety. - */ - if (HOSTFS_I(inode)->fd != -1) { - close_file(&HOSTFS_I(inode)->fd); - printk(KERN_DEBUG "Closing host fd in .destroy_inode\n"); - } - kfree(HOSTFS_I(inode)); } @@ -339,9 +326,8 @@ static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) static const struct super_operations hostfs_sbops = { .alloc_inode = hostfs_alloc_inode, - .drop_inode = generic_delete_inode, - .delete_inode = hostfs_delete_inode, .destroy_inode = hostfs_destroy_inode, + .evict_inode = hostfs_evict_inode, .statfs = hostfs_statfs, .show_options = hostfs_show_options, }; -- cgit v1.2.3-70-g09d2 From 601d2c38b93130d387091c28d13abea40924e518 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 17:53:01 -0400 Subject: hostfs: don't keep a field in each inode when we are using it only in root Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fab5f5a1e6f..7e6750499b8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -19,7 +19,6 @@ #include "kern.h" struct hostfs_inode_info { - char *host_filename; int fd; fmode_t mode; struct inode vfs_inode; @@ -103,7 +102,7 @@ static char *dentry_name(struct dentry *dentry, int extra) parent = parent->d_parent; } - root = HOSTFS_I(parent->d_inode)->host_filename; + root = parent->d_sb->s_fs_info; len += strlen(root); name = kmalloc(len + extra + 1, GFP_KERNEL); if (name == NULL) @@ -266,7 +265,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) long long f_files; long long f_ffree; - err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename, + err = do_statfs(dentry->d_sb->s_fs_info, &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), &sf->f_namelen, sf->f_spare); @@ -285,13 +284,10 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) { struct hostfs_inode_info *hi; - hi = kmalloc(sizeof(*hi), GFP_KERNEL); + hi = kzalloc(sizeof(*hi), GFP_KERNEL); if (hi == NULL) return NULL; - - *hi = ((struct hostfs_inode_info) { .host_filename = NULL, - .fd = -1, - .mode = 0 }); + hi->fd = -1; inode_init_once(&hi->vfs_inode); return &hi->vfs_inode; } @@ -308,14 +304,12 @@ static void hostfs_evict_inode(struct inode *inode) static void hostfs_destroy_inode(struct inode *inode) { - kfree(HOSTFS_I(inode)->host_filename); kfree(HOSTFS_I(inode)); } static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { - struct inode *root = vfs->mnt_sb->s_root->d_inode; - const char *root_path = HOSTFS_I(root)->host_filename; + const char *root_path = vfs->mnt_sb->s_fs_info; size_t offset = strlen(root_ino) + 1; if (strlen(root_path) > offset) @@ -978,8 +972,8 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) req_root = ""; err = -ENOMEM; - host_root_path = kmalloc(strlen(root_ino) + 1 - + strlen(req_root) + 1, GFP_KERNEL); + sb->s_fs_info = host_root_path = + kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL); if (host_root_path == NULL) goto out; @@ -988,20 +982,13 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) root_inode = hostfs_iget(sb); if (IS_ERR(root_inode)) { err = PTR_ERR(root_inode); - goto out_free; + goto out; } err = init_inode(root_inode, NULL); if (err) goto out_put; - HOSTFS_I(root_inode)->host_filename = host_root_path; - /* - * Avoid that in the error path, iput(root_inode) frees again - * host_root_path through hostfs_destroy_inode! - */ - host_root_path = NULL; - err = -ENOMEM; sb->s_root = d_alloc_root(root_inode); if (sb->s_root == NULL) @@ -1019,8 +1006,6 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) out_put: iput(root_inode); -out_free: - kfree(host_root_path); out: return err; } @@ -1032,11 +1017,17 @@ static int hostfs_read_sb(struct file_system_type *type, return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); } +static void hostfs_kill_sb(struct super_block *s) +{ + kill_anon_super(s); + kfree(s->s_fs_info); +} + static struct file_system_type hostfs_type = { .owner = THIS_MODULE, .name = "hostfs", .get_sb = hostfs_read_sb, - .kill_sb = kill_anon_super, + .kill_sb = hostfs_kill_sb, .fs_flags = 0, }; -- cgit v1.2.3-70-g09d2 From 52b209f7b848a28987ed133dc2b48f304b1dc6b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 18:43:19 -0400 Subject: get rid of hostfs_read_inode() There are only two call sites; in one (hostfs_iget()) it's actually a no-op and in another (fill_super()) it's easier to expand the damn thing and use what we know about its arguments to simplify it. Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 69 ++++++++++++------------------------------------- 1 file changed, 16 insertions(+), 53 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 7e6750499b8..25d79298a98 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -204,50 +204,11 @@ static char *follow_link(char *link) return ERR_PTR(n); } -static int hostfs_read_inode(struct inode *ino) -{ - char *name; - int err = 0; - - /* - * Unfortunately, we are called from iget() when we don't have a dentry - * allocated yet. - */ - if (list_empty(&ino->i_dentry)) - goto out; - - err = -ENOMEM; - name = inode_name(ino, 0); - if (name == NULL) - goto out; - - if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) { - name = follow_link(name); - if (IS_ERR(name)) { - err = PTR_ERR(name); - goto out; - } - } - - err = read_name(ino, name); - kfree(name); - out: - return err; -} - static struct inode *hostfs_iget(struct super_block *sb) { - struct inode *inode; - long ret; - - inode = new_inode(sb); + struct inode *inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); - ret = hostfs_read_inode(inode); - if (ret < 0) { - iput(inode); - return ERR_PTR(ret); - } return inode; } @@ -979,13 +940,23 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sprintf(host_root_path, "%s/%s", root_ino, req_root); - root_inode = hostfs_iget(sb); - if (IS_ERR(root_inode)) { - err = PTR_ERR(root_inode); + root_inode = new_inode(sb); + if (!root_inode) goto out; - } - err = init_inode(root_inode, NULL); + root_inode->i_op = &hostfs_dir_iops; + root_inode->i_fop = &hostfs_dir_fops; + + if (file_type(host_root_path, NULL, NULL) == OS_TYPE_SYMLINK) { + char *name = follow_link(host_root_path); + if (IS_ERR(name)) + err = PTR_ERR(name); + else + err = read_name(root_inode, name); + kfree(name); + } else { + err = read_name(root_inode, host_root_path); + } if (err) goto out_put; @@ -994,14 +965,6 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) if (sb->s_root == NULL) goto out_put; - err = hostfs_read_inode(root_inode); - if (err) { - /* No iput in this case because the dput does that for us */ - dput(sb->s_root); - sb->s_root = NULL; - goto out; - } - return 0; out_put: -- cgit v1.2.3-70-g09d2 From 5e2df28cc62fdc3f4900de23f4ec69e6312f78a4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 19:38:18 -0400 Subject: hostfs: pass pathname to init_inode() We will calculate it in all callers anyway, so there's no need to duplicate that inside. Moreover, that way we lose all failure exits in init_inode(), so it doesn't need to return anything. Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 45 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 25d79298a98..5a77ed3dfd7 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -485,25 +485,16 @@ static const struct address_space_operations hostfs_aops = { .write_end = hostfs_write_end, }; -static int init_inode(struct inode *inode, struct dentry *dentry) +static void init_inode(struct inode *inode, char *path) { - char *name; - int type, err = -ENOMEM; + int type; int maj, min; dev_t rdev = 0; - if (dentry) { - name = dentry_name(dentry, 0); - if (name == NULL) - goto out; - type = file_type(name, &maj, &min); - /* Reencode maj and min with the kernel encoding.*/ - rdev = MKDEV(maj, min); - kfree(name); - } - else type = OS_TYPE_DIR; + type = file_type(path, &maj, &min); + /* Reencode maj and min with the kernel encoding.*/ + rdev = MKDEV(maj, min); - err = 0; if (type == OS_TYPE_SYMLINK) inode->i_op = &page_symlink_inode_operations; else if (type == OS_TYPE_DIR) @@ -531,8 +522,6 @@ static int init_inode(struct inode *inode, struct dentry *dentry) init_special_inode(inode, S_IFSOCK, 0); break; } - out: - return err; } int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, @@ -548,10 +537,6 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, goto out; } - error = init_inode(inode, dentry); - if (error) - goto out_put; - error = -ENOMEM; name = dentry_name(dentry, 0); if (name == NULL) @@ -561,9 +546,12 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); - if (fd < 0) + if (fd < 0) { error = fd; - else error = read_name(inode, name); + } else { + error = read_name(inode, name); + init_inode(inode, name); + } kfree(name); if (error) @@ -593,16 +581,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out; } - err = init_inode(inode, dentry); - if (err) - goto out_put; - err = -ENOMEM; name = dentry_name(dentry, 0); if (name == NULL) goto out_put; err = read_name(inode, name); + init_inode(inode, name); + kfree(name); if (err == -ENOENT) { iput(inode); @@ -717,10 +703,6 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) goto out; } - err = init_inode(inode, dentry); - if (err) - goto out_put; - err = -ENOMEM; name = dentry_name(dentry, 0); if (name == NULL) @@ -732,6 +714,9 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) goto out_free; err = read_name(inode, name); + init_inode(inode, name); + if (err) + goto out_put; kfree(name); if (err) goto out_put; -- cgit v1.2.3-70-g09d2 From 39b743c6199a317ffac67fcae1dd05be3142633a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 20:08:56 -0400 Subject: switch stat_file() to passing a single struct rather than fsckloads of pointers Signed-off-by: Al Viro --- fs/hostfs/hostfs.h | 20 +++++++++---- fs/hostfs/hostfs_kern.c | 31 ++++++++------------ fs/hostfs/hostfs_user.c | 75 ++++++++++++++++++++----------------------------- 3 files changed, 58 insertions(+), 68 deletions(-) diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 2f34f8f2134..3a52edef994 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -53,11 +53,21 @@ struct hostfs_iattr { struct timespec ia_ctime; }; -extern int stat_file(const char *path, unsigned long long *inode_out, - int *mode_out, int *nlink_out, int *uid_out, int *gid_out, - unsigned long long *size_out, struct timespec *atime_out, - struct timespec *mtime_out, struct timespec *ctime_out, - int *blksize_out, unsigned long long *blocks_out, int fd); +struct hostfs_stat { + unsigned long long ino; + unsigned int mode; + unsigned int nlink; + unsigned int uid; + unsigned int gid; + unsigned long long size; + struct timespec atime, mtime, ctime; + unsigned int blksize; + unsigned long long blocks; + unsigned int maj; + unsigned int min; +}; + +extern int stat_file(const char *path, struct hostfs_stat *p, int fd); extern int access_file(char *path, int r, int w, int x); extern int open_file(char *path, int r, int w, int append); extern int file_type(const char *path, int *maj, int *min); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 5a77ed3dfd7..420a826ae0f 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -131,28 +131,21 @@ static char *inode_name(struct inode *ino, int extra) static int read_name(struct inode *ino, char *name) { - /* - * The non-int inode fields are copied into ints by stat_file and - * then copied into the inode because passing the actual pointers - * in and having them treated as int * breaks on big-endian machines - */ - int err; - int i_mode, i_nlink, i_blksize; - unsigned long long i_size; - unsigned long long i_ino; - unsigned long long i_blocks; - - err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, - &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, - &ino->i_ctime, &i_blksize, &i_blocks, -1); + struct hostfs_stat st; + int err = stat_file(name, &st, -1); if (err) return err; - ino->i_ino = i_ino; - ino->i_mode = i_mode; - ino->i_nlink = i_nlink; - ino->i_size = i_size; - ino->i_blocks = i_blocks; + ino->i_ino = st.ino; + ino->i_mode = st.mode; + ino->i_nlink = st.nlink; + ino->i_uid = st.uid; + ino->i_gid = st.gid; + ino->i_atime = st.atime; + ino->i_mtime = st.mtime; + ino->i_ctime = st.ctime; + ino->i_size = st.size; + ino->i_blocks = st.blocks; return 0; } diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 4b8c666ba28..701d454a679 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -19,11 +19,27 @@ #include "user.h" #include -int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, - int *nlink_out, int *uid_out, int *gid_out, - unsigned long long *size_out, struct timespec *atime_out, - struct timespec *mtime_out, struct timespec *ctime_out, - int *blksize_out, unsigned long long *blocks_out, int fd) +static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) +{ + p->ino = buf->st_ino; + p->mode = buf->st_mode; + p->nlink = buf->st_nlink; + p->uid = buf->st_uid; + p->gid = buf->st_gid; + p->size = buf->st_size; + p->atime.tv_sec = buf->st_atime; + p->atime.tv_nsec = 0; + p->ctime.tv_sec = buf->st_ctime; + p->ctime.tv_nsec = 0; + p->mtime.tv_sec = buf->st_mtime; + p->mtime.tv_nsec = 0; + p->blksize = buf->st_blksize; + p->blocks = buf->st_blocks; + p->maj = os_major(buf->st_rdev); + p->min = os_minor(buf->st_rdev); +} + +int stat_file(const char *path, struct hostfs_stat *p, int fd) { struct stat64 buf; @@ -33,35 +49,7 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, } else if (lstat64(path, &buf) < 0) { return -errno; } - - if (inode_out != NULL) - *inode_out = buf.st_ino; - if (mode_out != NULL) - *mode_out = buf.st_mode; - if (nlink_out != NULL) - *nlink_out = buf.st_nlink; - if (uid_out != NULL) - *uid_out = buf.st_uid; - if (gid_out != NULL) - *gid_out = buf.st_gid; - if (size_out != NULL) - *size_out = buf.st_size; - if (atime_out != NULL) { - atime_out->tv_sec = buf.st_atime; - atime_out->tv_nsec = 0; - } - if (mtime_out != NULL) { - mtime_out->tv_sec = buf.st_mtime; - mtime_out->tv_nsec = 0; - } - if (ctime_out != NULL) { - ctime_out->tv_sec = buf.st_ctime; - ctime_out->tv_nsec = 0; - } - if (blksize_out != NULL) - *blksize_out = buf.st_blksize; - if (blocks_out != NULL) - *blocks_out = buf.st_blocks; + stat64_to_hostfs(&buf, p); return 0; } @@ -235,8 +223,8 @@ int file_create(char *name, int ur, int uw, int ux, int gr, int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) { + struct hostfs_stat st; struct timeval times[2]; - struct timespec atime_ts, mtime_ts; int err, ma; if (attrs->ia_valid & HOSTFS_ATTR_MODE) { @@ -279,15 +267,14 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) */ ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); if (attrs->ia_valid & ma) { - err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, - &atime_ts, &mtime_ts, NULL, NULL, NULL, fd); + err = stat_file(file, &st, fd); if (err != 0) return err; - times[0].tv_sec = atime_ts.tv_sec; - times[0].tv_usec = atime_ts.tv_nsec / 1000; - times[1].tv_sec = mtime_ts.tv_sec; - times[1].tv_usec = mtime_ts.tv_nsec / 1000; + times[0].tv_sec = st.atime.tv_sec; + times[0].tv_usec = st.atime.tv_nsec / 1000; + times[1].tv_sec = st.mtime.tv_sec; + times[1].tv_usec = st.mtime.tv_nsec / 1000; if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { times[0].tv_sec = attrs->ia_atime.tv_sec; @@ -308,9 +295,9 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) /* Note: ctime is not handled */ if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) { - err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, - &attrs->ia_atime, &attrs->ia_mtime, NULL, - NULL, NULL, fd); + err = stat_file(file, &st, fd); + attrs->ia_atime = st.atime; + attrs->ia_mtime = st.mtime; if (err != 0) return err; } -- cgit v1.2.3-70-g09d2 From 4754b825571a6f2f7655245e420e8e486c4458f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 20:33:12 -0400 Subject: hostfs: get rid of file_type(), fold init_inode() Signed-off-by: Al Viro --- fs/hostfs/hostfs.h | 1 - fs/hostfs/hostfs_kern.c | 107 ++++++++++++++++++++---------------------------- fs/hostfs/hostfs_user.c | 30 -------------- 3 files changed, 45 insertions(+), 93 deletions(-) diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 3a52edef994..ea87e224ed9 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -70,7 +70,6 @@ struct hostfs_stat { extern int stat_file(const char *path, struct hostfs_stat *p, int fd); extern int access_file(char *path, int r, int w, int x); extern int open_file(char *path, int r, int w, int append); -extern int file_type(const char *path, int *maj, int *min); extern void *open_dir(char *path, int *err_out); extern char *read_dir(void *stream, unsigned long long *pos, unsigned long long *ino_out, int *len_out); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 420a826ae0f..b29a2b878f4 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -129,26 +129,6 @@ static char *inode_name(struct inode *ino, int extra) return dentry_name(dentry, extra); } -static int read_name(struct inode *ino, char *name) -{ - struct hostfs_stat st; - int err = stat_file(name, &st, -1); - if (err) - return err; - - ino->i_ino = st.ino; - ino->i_mode = st.mode; - ino->i_nlink = st.nlink; - ino->i_uid = st.uid; - ino->i_gid = st.gid; - ino->i_atime = st.atime; - ino->i_mtime = st.mtime; - ino->i_ctime = st.ctime; - ino->i_size = st.size; - ino->i_blocks = st.blocks; - return 0; -} - static char *follow_link(char *link) { int len, n; @@ -478,43 +458,51 @@ static const struct address_space_operations hostfs_aops = { .write_end = hostfs_write_end, }; -static void init_inode(struct inode *inode, char *path) +static int read_name(struct inode *ino, char *name) { - int type; - int maj, min; - dev_t rdev = 0; + dev_t rdev; + struct hostfs_stat st; + int err = stat_file(name, &st, -1); + if (err) + return err; - type = file_type(path, &maj, &min); /* Reencode maj and min with the kernel encoding.*/ - rdev = MKDEV(maj, min); + rdev = MKDEV(st.maj, st.min); - if (type == OS_TYPE_SYMLINK) - inode->i_op = &page_symlink_inode_operations; - else if (type == OS_TYPE_DIR) - inode->i_op = &hostfs_dir_iops; - else inode->i_op = &hostfs_iops; - - if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; - else inode->i_fop = &hostfs_file_fops; - - if (type == OS_TYPE_SYMLINK) - inode->i_mapping->a_ops = &hostfs_link_aops; - else inode->i_mapping->a_ops = &hostfs_aops; - - switch (type) { - case OS_TYPE_CHARDEV: - init_special_inode(inode, S_IFCHR, rdev); - break; - case OS_TYPE_BLOCKDEV: - init_special_inode(inode, S_IFBLK, rdev); + switch (st.mode & S_IFMT) { + case S_IFLNK: + ino->i_op = &page_symlink_inode_operations; + ino->i_mapping->a_ops = &hostfs_link_aops; break; - case OS_TYPE_FIFO: - init_special_inode(inode, S_IFIFO, 0); + case S_IFDIR: + ino->i_op = &hostfs_dir_iops; + ino->i_fop = &hostfs_dir_fops; break; - case OS_TYPE_SOCK: - init_special_inode(inode, S_IFSOCK, 0); + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + init_special_inode(ino, st.mode & S_IFMT, rdev); + ino->i_op = &hostfs_iops; break; + + default: + ino->i_op = &hostfs_iops; + ino->i_fop = &hostfs_file_fops; + ino->i_mapping->a_ops = &hostfs_aops; } + + ino->i_ino = st.ino; + ino->i_mode = st.mode; + ino->i_nlink = st.nlink; + ino->i_uid = st.uid; + ino->i_gid = st.gid; + ino->i_atime = st.atime; + ino->i_mtime = st.mtime; + ino->i_ctime = st.ctime; + ino->i_size = st.size; + ino->i_blocks = st.blocks; + return 0; } int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, @@ -539,12 +527,10 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); - if (fd < 0) { + if (fd < 0) error = fd; - } else { + else error = read_name(inode, name); - init_inode(inode, name); - } kfree(name); if (error) @@ -580,7 +566,6 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out_put; err = read_name(inode, name); - init_inode(inode, name); kfree(name); if (err == -ENOENT) { @@ -707,7 +692,6 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) goto out_free; err = read_name(inode, name); - init_inode(inode, name); if (err) goto out_put; kfree(name); @@ -922,21 +906,20 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) if (!root_inode) goto out; - root_inode->i_op = &hostfs_dir_iops; - root_inode->i_fop = &hostfs_dir_fops; + err = read_name(root_inode, host_root_path); + if (err) + goto out_put; - if (file_type(host_root_path, NULL, NULL) == OS_TYPE_SYMLINK) { + if (S_ISLNK(root_inode->i_mode)) { char *name = follow_link(host_root_path); if (IS_ERR(name)) err = PTR_ERR(name); else err = read_name(root_inode, name); kfree(name); - } else { - err = read_name(root_inode, host_root_path); + if (err) + goto out_put; } - if (err) - goto out_put; err = -ENOMEM; sb->s_root = d_alloc_root(root_inode); diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 701d454a679..91ebfcefa40 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -53,36 +53,6 @@ int stat_file(const char *path, struct hostfs_stat *p, int fd) return 0; } -int file_type(const char *path, int *maj, int *min) -{ - struct stat64 buf; - - if (lstat64(path, &buf) < 0) - return -errno; - /* - * We cannot pass rdev as is because glibc and the kernel disagree - * about its definition. - */ - if (maj != NULL) - *maj = os_major(buf.st_rdev); - if (min != NULL) - *min = os_minor(buf.st_rdev); - - if (S_ISDIR(buf.st_mode)) - return OS_TYPE_DIR; - else if (S_ISLNK(buf.st_mode)) - return OS_TYPE_SYMLINK; - else if (S_ISCHR(buf.st_mode)) - return OS_TYPE_CHARDEV; - else if (S_ISBLK(buf.st_mode)) - return OS_TYPE_BLOCKDEV; - else if (S_ISFIFO(buf.st_mode)) - return OS_TYPE_FIFO; - else if (S_ISSOCK(buf.st_mode)) - return OS_TYPE_SOCK; - else return OS_TYPE_FILE; -} - int access_file(char *path, int r, int w, int x) { int mode = 0; -- cgit v1.2.3-70-g09d2 From c5322220eb91b9e56ac7b69eb690d9d20fac5725 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 20:42:10 -0400 Subject: hostfs: get rid of inode_dentry_name() it's equivalent to dentry_name() anyway Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 55 ++++++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 35 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index b29a2b878f4..3841fb1ca5a 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -89,7 +89,7 @@ __uml_setup("hostfs=", hostfs_args, ); #endif -static char *dentry_name(struct dentry *dentry, int extra) +static char *dentry_name(struct dentry *dentry) { struct dentry *parent; char *root, *name; @@ -104,7 +104,7 @@ static char *dentry_name(struct dentry *dentry, int extra) root = parent->d_sb->s_fs_info; len += strlen(root); - name = kmalloc(len + extra + 1, GFP_KERNEL); + name = kmalloc(len + 1, GFP_KERNEL); if (name == NULL) return NULL; @@ -121,12 +121,12 @@ static char *dentry_name(struct dentry *dentry, int extra) return name; } -static char *inode_name(struct inode *ino, int extra) +static char *inode_name(struct inode *ino) { struct dentry *dentry; dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); - return dentry_name(dentry, extra); + return dentry_name(dentry); } static char *follow_link(char *link) @@ -267,7 +267,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) unsigned long long next, ino; int error, len; - name = dentry_name(file->f_path.dentry, 0); + name = dentry_name(file->f_path.dentry); if (name == NULL) return -ENOMEM; dir = open_dir(name, &error); @@ -312,7 +312,7 @@ int hostfs_file_open(struct inode *ino, struct file *file) if (w) r = 1; - name = dentry_name(file->f_path.dentry, 0); + name = dentry_name(file->f_path.dentry); if (name == NULL) return -ENOMEM; @@ -519,7 +519,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, } error = -ENOMEM; - name = dentry_name(dentry, 0); + name = dentry_name(dentry); if (name == NULL) goto out_put; @@ -561,7 +561,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, } err = -ENOMEM; - name = dentry_name(dentry, 0); + name = dentry_name(dentry); if (name == NULL) goto out_put; @@ -585,29 +585,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, return ERR_PTR(err); } -static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) -{ - char *file; - int len; - - file = inode_name(ino, dentry->d_name.len + 1); - if (file == NULL) - return NULL; - strcat(file, "/"); - len = strlen(file); - strncat(file, dentry->d_name.name, dentry->d_name.len); - file[len + dentry->d_name.len] = '\0'; - return file; -} - int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) { char *from_name, *to_name; int err; - if ((from_name = inode_dentry_name(ino, from)) == NULL) + if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; - to_name = dentry_name(to, 0); + to_name = dentry_name(to); if (to_name == NULL) { kfree(from_name); return -ENOMEM; @@ -623,7 +608,7 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry) char *file; int err; - if ((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; if (append) return -EPERM; @@ -638,7 +623,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) char *file; int err; - if ((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = make_symlink(file, to); kfree(file); @@ -650,7 +635,7 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) char *file; int err; - if ((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_mkdir(file, mode); kfree(file); @@ -662,7 +647,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; - if ((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); kfree(file); @@ -682,7 +667,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) } err = -ENOMEM; - name = dentry_name(dentry, 0); + name = dentry_name(dentry); if (name == NULL) goto out_put; @@ -715,9 +700,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, char *from_name, *to_name; int err; - if ((from_name = inode_dentry_name(from_ino, from)) == NULL) + if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; - if ((to_name = inode_dentry_name(to_ino, to)) == NULL) { + if ((to_name = dentry_name(to)) == NULL) { kfree(from_name); return -ENOMEM; } @@ -735,7 +720,7 @@ int hostfs_permission(struct inode *ino, int desired) if (desired & MAY_READ) r = 1; if (desired & MAY_WRITE) w = 1; if (desired & MAY_EXEC) x = 1; - name = inode_name(ino, 0); + name = inode_name(ino); if (name == NULL) return -ENOMEM; @@ -801,7 +786,7 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_MTIME_SET) { attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; } - name = dentry_name(dentry, 0); + name = dentry_name(dentry); if (name == NULL) return -ENOMEM; err = set_attr(name, &attrs, fd); @@ -856,7 +841,7 @@ int hostfs_link_readpage(struct file *file, struct page *page) int err; buffer = kmap(page); - name = inode_name(page->mapping->host, 0); + name = inode_name(page->mapping->host); if (name == NULL) return -ENOMEM; err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE); -- cgit v1.2.3-70-g09d2 From d0352d3ed722b134dacc21836c1763e7e3523662 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 21:51:16 -0400 Subject: hostfs: sanitize symlinks Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 61 ++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 3841fb1ca5a..10bb71b1548 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "hostfs.h" #include "init.h" #include "kern.h" @@ -48,7 +49,7 @@ static int append = 0; static const struct inode_operations hostfs_iops; static const struct inode_operations hostfs_dir_iops; -static const struct address_space_operations hostfs_link_aops; +static const struct inode_operations hostfs_link_iops; #ifndef MODULE static int __init hostfs_args(char *options, int *add) @@ -471,8 +472,7 @@ static int read_name(struct inode *ino, char *name) switch (st.mode & S_IFMT) { case S_IFLNK: - ino->i_op = &page_symlink_inode_operations; - ino->i_mapping->a_ops = &hostfs_link_aops; + ino->i_op = &hostfs_link_iops; break; case S_IFDIR: ino->i_op = &hostfs_dir_iops; @@ -835,32 +835,41 @@ static const struct inode_operations hostfs_dir_iops = { .setattr = hostfs_setattr, }; -int hostfs_link_readpage(struct file *file, struct page *page) -{ - char *buffer, *name; - int err; - - buffer = kmap(page); - name = inode_name(page->mapping->host); - if (name == NULL) - return -ENOMEM; - err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE); - kfree(name); - if (err == PAGE_CACHE_SIZE) - err = -E2BIG; - else if (err > 0) { - flush_dcache_page(page); - SetPageUptodate(page); - if (PageError(page)) ClearPageError(page); - err = 0; +static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *link = __getname(); + if (link) { + char *path = dentry_name(dentry); + int err = -ENOMEM; + if (path) { + int err = hostfs_do_readlink(path, link, PATH_MAX); + if (err == PATH_MAX) + err = -E2BIG; + kfree(path); + } + if (err < 0) { + __putname(link); + link = ERR_PTR(err); + } + } else { + link = ERR_PTR(-ENOMEM); } - kunmap(page); - unlock_page(page); - return err; + + nd_set_link(nd, link); + return NULL; +} + +static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + __putname(s); } -static const struct address_space_operations hostfs_link_aops = { - .readpage = hostfs_link_readpage, +static const struct inode_operations hostfs_link_iops = { + .readlink = generic_readlink, + .follow_link = hostfs_follow_link, + .put_link = hostfs_put_link, }; static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) -- cgit v1.2.3-70-g09d2 From c103135c14e03fc9a9e5f0adc01df9ad272cf2a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 22:31:14 -0400 Subject: new helper: __dentry_path() builds path relative to fs root, called under dcache_lock, doesn't append any nonsense to unlinked ones. Signed-off-by: Al Viro --- fs/dcache.c | 27 ++++++++++++++++++++++----- include/linux/dcache.h | 1 + 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 86d4db15473..caf08574982 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2049,16 +2049,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, /* * Write full pathname from the root of the filesystem into the buffer. */ -char *dentry_path(struct dentry *dentry, char *buf, int buflen) +char *__dentry_path(struct dentry *dentry, char *buf, int buflen) { char *end = buf + buflen; char *retval; - spin_lock(&dcache_lock); prepend(&end, &buflen, "\0", 1); - if (d_unlinked(dentry) && - (prepend(&end, &buflen, "//deleted", 9) != 0)) - goto Elong; if (buflen < 1) goto Elong; /* Get '/' right */ @@ -2076,7 +2072,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) retval = end; dentry = parent; } + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} +EXPORT_SYMBOL(__dentry_path); + +char *dentry_path(struct dentry *dentry, char *buf, int buflen) +{ + char *p = NULL; + char *retval; + + spin_lock(&dcache_lock); + if (d_unlinked(dentry)) { + p = buf + buflen; + if (prepend(&p, &buflen, "//deleted", 10) != 0) + goto Elong; + buflen++; + } + retval = __dentry_path(dentry, buf, buflen); spin_unlock(&dcache_lock); + if (!IS_ERR(retval) && p) + *p = '/'; /* restore '/' overriden with '\0' */ return retval; Elong: spin_unlock(&dcache_lock); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index eebb617c17d..d23be0386e2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -315,6 +315,7 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *path, struct path *root, char *, int); extern char *d_path(const struct path *, char *, int); +extern char *__dentry_path(struct dentry *, char *, int); extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ -- cgit v1.2.3-70-g09d2 From e9193059b1b3733695d5b80e667778311695aa73 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 23:16:34 -0400 Subject: hostfs: fix races in dentry_name() and inode_name() calculating size, then doing allocation, then filling the path is a Bad Idea(tm), since the ancestors can be renamed, leading to buffer overrun. Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 106 +++++++++++++++++++++++++++--------------------- 1 file changed, 60 insertions(+), 46 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 10bb71b1548..79783a0b2f4 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -90,44 +90,58 @@ __uml_setup("hostfs=", hostfs_args, ); #endif -static char *dentry_name(struct dentry *dentry) +static char *__dentry_name(struct dentry *dentry, char *name) { - struct dentry *parent; - char *root, *name; - int len; + char *p = __dentry_path(dentry, name, PATH_MAX); + char *root; + size_t len; - len = 0; - parent = dentry; - while (parent->d_parent != parent) { - len += parent->d_name.len + 1; - parent = parent->d_parent; - } + spin_unlock(&dcache_lock); - root = parent->d_sb->s_fs_info; - len += strlen(root); - name = kmalloc(len + 1, GFP_KERNEL); - if (name == NULL) + root = dentry->d_sb->s_fs_info; + len = strlen(root); + if (IS_ERR(p)) { + __putname(name); return NULL; - - name[len] = '\0'; - parent = dentry; - while (parent->d_parent != parent) { - len -= parent->d_name.len + 1; - name[len] = '/'; - strncpy(&name[len + 1], parent->d_name.name, - parent->d_name.len); - parent = parent->d_parent; } - strncpy(name, root, strlen(root)); + strncpy(name, root, PATH_MAX); + if (len > p - name) { + __putname(name); + return NULL; + } + if (p > name + len) { + char *s = name + len; + while ((*s++ = *p++) != '\0') + ; + } return name; } +static char *dentry_name(struct dentry *dentry) +{ + char *name = __getname(); + if (!name) + return NULL; + + spin_lock(&dcache_lock); + return __dentry_name(dentry, name); /* will unlock */ +} + static char *inode_name(struct inode *ino) { struct dentry *dentry; + char *name = __getname(); + if (!name) + return NULL; - dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); - return dentry_name(dentry); + spin_lock(&dcache_lock); + if (list_empty(&ino->i_dentry)) { + spin_unlock(&dcache_lock); + __putname(name); + return NULL; + } + dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias); + return __dentry_name(dentry, name); /* will unlock */ } static char *follow_link(char *link) @@ -272,7 +286,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) if (name == NULL) return -ENOMEM; dir = open_dir(name, &error); - kfree(name); + __putname(name); if (dir == NULL) return -error; next = file->f_pos; @@ -318,7 +332,7 @@ int hostfs_file_open(struct inode *ino, struct file *file) return -ENOMEM; fd = open_file(name, r, w, append); - kfree(name); + __putname(name); if (fd < 0) return fd; FILE_HOSTFS_I(file)->fd = fd; @@ -532,7 +546,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, else error = read_name(inode, name); - kfree(name); + __putname(name); if (error) goto out_put; @@ -567,7 +581,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, err = read_name(inode, name); - kfree(name); + __putname(name); if (err == -ENOENT) { iput(inode); inode = NULL; @@ -594,12 +608,12 @@ int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) return -ENOMEM; to_name = dentry_name(to); if (to_name == NULL) { - kfree(from_name); + __putname(from_name); return -ENOMEM; } err = link_file(to_name, from_name); - kfree(from_name); - kfree(to_name); + __putname(from_name); + __putname(to_name); return err; } @@ -614,7 +628,7 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry) return -EPERM; err = unlink_file(file); - kfree(file); + __putname(file); return err; } @@ -626,7 +640,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = make_symlink(file, to); - kfree(file); + __putname(file); return err; } @@ -638,7 +652,7 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_mkdir(file, mode); - kfree(file); + __putname(file); return err; } @@ -650,7 +664,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); - kfree(file); + __putname(file); return err; } @@ -673,13 +687,13 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) init_special_inode(inode, mode, dev); err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); - if (err) + if (!err) goto out_free; err = read_name(inode, name); + __putname(name); if (err) goto out_put; - kfree(name); if (err) goto out_put; @@ -687,7 +701,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) return 0; out_free: - kfree(name); + __putname(name); out_put: iput(inode); out: @@ -703,12 +717,12 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; if ((to_name = dentry_name(to)) == NULL) { - kfree(from_name); + __putname(from_name); return -ENOMEM; } err = rename_file(from_name, to_name); - kfree(from_name); - kfree(to_name); + __putname(from_name); + __putname(to_name); return err; } @@ -729,7 +743,7 @@ int hostfs_permission(struct inode *ino, int desired) err = 0; else err = access_file(name, r, w, x); - kfree(name); + __putname(name); if (!err) err = generic_permission(ino, desired, NULL); return err; @@ -790,7 +804,7 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) if (name == NULL) return -ENOMEM; err = set_attr(name, &attrs, fd); - kfree(name); + __putname(name); if (err) return err; @@ -845,7 +859,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) int err = hostfs_do_readlink(path, link, PATH_MAX); if (err == PATH_MAX) err = -E2BIG; - kfree(path); + __putname(path); } if (err < 0) { __putname(link); -- cgit v1.2.3-70-g09d2 From f8d7e1877e5121841bc9a4d284a04dbc13f45bea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 23:19:04 -0400 Subject: leak in hostfs_unlink() Signed-off-by: Al Viro --- fs/hostfs/hostfs_kern.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 79783a0b2f4..8130ce93a06 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -622,11 +622,12 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry) char *file; int err; - if ((file = dentry_name(dentry)) == NULL) - return -ENOMEM; if (append) return -EPERM; + if ((file = dentry_name(dentry)) == NULL) + return -ENOMEM; + err = unlink_file(file); __putname(file); return err; -- cgit v1.2.3-70-g09d2 From f8ad850f11e11d10e7de1a16ca53cb193afc9313 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jun 2010 23:49:18 -0400 Subject: try to get rid of races in hostfs open() In case of mode mismatch, do *not* blindly close the descriptor another openers might be using right now. Open the underlying file with currently sufficient mode, then * if current mode has grown so that it's sufficient for us now, just close our new fd * if current mode has grown and our fd is *not* enough to cover it, close and repeat. * otherwise, install our fd if the file hadn't been opened at all or dup2() our fd over the current one (and close our fd). Critical section is protected by mutex; yes, system-wide. All we do under it is a bunch of comparison and maybe an overwriting dup2() on host. Signed-off-by: Al Viro --- fs/hostfs/hostfs.h | 1 + fs/hostfs/hostfs_kern.c | 43 +++++++++++++++++++++++++++++++------------ fs/hostfs/hostfs_user.c | 5 +++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index ea87e224ed9..6bbd75c5589 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -74,6 +74,7 @@ extern void *open_dir(char *path, int *err_out); extern char *read_dir(void *stream, unsigned long long *pos, unsigned long long *ino_out, int *len_out); extern void close_file(void *stream); +extern int replace_file(int oldfd, int fd); extern void close_dir(void *stream); extern int read_file(int fd, unsigned long long *offset, char *buf, int len); extern int write_file(int fd, unsigned long long *offset, const char *buf, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 8130ce93a06..dd1e55535a4 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -302,27 +302,22 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) int hostfs_file_open(struct inode *ino, struct file *file) { + static DEFINE_MUTEX(open_mutex); char *name; fmode_t mode = 0; + int err; int r = 0, w = 0, fd; mode = file->f_mode & (FMODE_READ | FMODE_WRITE); if ((mode & HOSTFS_I(ino)->mode) == mode) return 0; - /* - * The file may already have been opened, but with the wrong access, - * so this resets things and reopens the file with the new access. - */ - if (HOSTFS_I(ino)->fd != -1) { - close_file(&HOSTFS_I(ino)->fd); - HOSTFS_I(ino)->fd = -1; - } + mode |= HOSTFS_I(ino)->mode; - HOSTFS_I(ino)->mode |= mode; - if (HOSTFS_I(ino)->mode & FMODE_READ) +retry: + if (mode & FMODE_READ) r = 1; - if (HOSTFS_I(ino)->mode & FMODE_WRITE) + if (mode & FMODE_WRITE) w = 1; if (w) r = 1; @@ -335,7 +330,31 @@ int hostfs_file_open(struct inode *ino, struct file *file) __putname(name); if (fd < 0) return fd; - FILE_HOSTFS_I(file)->fd = fd; + + mutex_lock(&open_mutex); + /* somebody else had handled it first? */ + if ((mode & HOSTFS_I(ino)->mode) == mode) { + mutex_unlock(&open_mutex); + return 0; + } + if ((mode | HOSTFS_I(ino)->mode) != mode) { + mode |= HOSTFS_I(ino)->mode; + mutex_unlock(&open_mutex); + close_file(&fd); + goto retry; + } + if (HOSTFS_I(ino)->fd == -1) { + HOSTFS_I(ino)->fd = fd; + } else { + err = replace_file(fd, HOSTFS_I(ino)->fd); + close_file(&fd); + if (err < 0) { + mutex_unlock(&open_mutex); + return err; + } + } + HOSTFS_I(ino)->mode = mode; + mutex_unlock(&open_mutex); return 0; } diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 91ebfcefa40..6777aa06ce2 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -160,6 +160,11 @@ int fsync_file(int fd, int datasync) return 0; } +int replace_file(int oldfd, int fd) +{ + return dup2(oldfd, fd); +} + void close_file(void *stream) { close(*((int *) stream)); -- cgit v1.2.3-70-g09d2 From 33b0daaa5557e9dadf4c27407fae7d316bab5686 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 00:12:50 -0400 Subject: switch hppfs to ->evict_inode() Signed-off-by: Al Viro --- fs/hppfs/hppfs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 943ce751ce1..7b027720d82 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -624,12 +624,11 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb) return &hi->vfs_inode; } -void hppfs_delete_inode(struct inode *ino) +void hppfs_evict_inode(struct inode *ino) { + end_writeback(ino); dput(HPPFS_I(ino)->proc_dentry); mntput(ino->i_sb->s_fs_info); - - clear_inode(ino); } static void hppfs_destroy_inode(struct inode *inode) @@ -640,7 +639,7 @@ static void hppfs_destroy_inode(struct inode *inode) static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, - .delete_inode = hppfs_delete_inode, + .evict_inode = hppfs_evict_inode, .statfs = hppfs_statfs, }; -- cgit v1.2.3-70-g09d2 From ea544009206baa03d606161656618900260b48e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 00:18:40 -0400 Subject: switch hpfs to ->evict_inode() Signed-off-by: Al Viro --- fs/hpfs/hpfs_fn.h | 2 +- fs/hpfs/inode.c | 12 +++++++----- fs/hpfs/super.c | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 75f9d432485..b59eac0232a 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -281,7 +281,7 @@ void hpfs_write_inode(struct inode *); void hpfs_write_inode_nolock(struct inode *); int hpfs_setattr(struct dentry *, struct iattr *); void hpfs_write_if_changed(struct inode *); -void hpfs_delete_inode(struct inode *); +void hpfs_evict_inode(struct inode *); /* map.c */ diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 3f3b397fd4e..56f0da1cfd1 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -302,11 +302,13 @@ void hpfs_write_if_changed(struct inode *inode) hpfs_write_inode(inode); } -void hpfs_delete_inode(struct inode *inode) +void hpfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - lock_kernel(); - hpfs_remove_fnode(inode->i_sb, inode->i_ino); - unlock_kernel(); - clear_inode(inode); + end_writeback(inode); + if (!inode->i_nlink) { + lock_kernel(); + hpfs_remove_fnode(inode->i_sb, inode->i_ino); + unlock_kernel(); + } } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index aa53842c599..2607010be2f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -450,7 +450,7 @@ static const struct super_operations hpfs_sops = { .alloc_inode = hpfs_alloc_inode, .destroy_inode = hpfs_destroy_inode, - .delete_inode = hpfs_delete_inode, + .evict_inode = hpfs_evict_inode, .put_super = hpfs_put_super, .statfs = hpfs_statfs, .remount_fs = hpfs_remount_fs, -- cgit v1.2.3-70-g09d2 From 62aff86fdf18657d9eca7878654415f94f16d027 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 00:28:54 -0400 Subject: switch jfs to ->evict_inode() Signed-off-by: Al Viro --- fs/jfs/inode.c | 35 ++++++++++++++++++----------------- fs/jfs/jfs_inode.h | 2 +- fs/jfs/super.c | 8 +------- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index c38dc180628..9978803ceed 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -145,31 +145,32 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; } -void jfs_delete_inode(struct inode *inode) +void jfs_evict_inode(struct inode *inode) { - jfs_info("In jfs_delete_inode, inode = 0x%p", inode); + jfs_info("In jfs_evict_inode, inode = 0x%p", inode); - if (!is_bad_inode(inode)) + if (!inode->i_nlink && !is_bad_inode(inode)) { dquot_initialize(inode); - if (!is_bad_inode(inode) && - (JFS_IP(inode)->fileset == FILESYSTEM_I)) { - truncate_inode_pages(&inode->i_data, 0); + if (JFS_IP(inode)->fileset == FILESYSTEM_I) { + truncate_inode_pages(&inode->i_data, 0); - if (test_cflag(COMMIT_Freewmap, inode)) - jfs_free_zero_link(inode); + if (test_cflag(COMMIT_Freewmap, inode)) + jfs_free_zero_link(inode); - diFree(inode); + diFree(inode); - /* - * Free the inode from the quota allocation. - */ - dquot_initialize(inode); - dquot_free_inode(inode); - dquot_drop(inode); + /* + * Free the inode from the quota allocation. + */ + dquot_initialize(inode); + dquot_free_inode(inode); + } + } else { + truncate_inode_pages(&inode->i_data, 0); } - - clear_inode(inode); + end_writeback(inode); + dquot_drop(inode); } void jfs_dirty_inode(struct inode *inode) diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 11042b1f44b..155e91eff07 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -27,7 +27,7 @@ extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); extern int jfs_write_inode(struct inode *, struct writeback_control *); -extern void jfs_delete_inode(struct inode *); +extern void jfs_evict_inode(struct inode *); extern void jfs_dirty_inode(struct inode *); extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b38f96bef82..ec8c3e4baca 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -132,11 +132,6 @@ static void jfs_destroy_inode(struct inode *inode) kmem_cache_free(jfs_inode_cachep, ji); } -static void jfs_clear_inode(struct inode *inode) -{ - dquot_drop(inode); -} - static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); @@ -765,8 +760,7 @@ static const struct super_operations jfs_super_operations = { .destroy_inode = jfs_destroy_inode, .dirty_inode = jfs_dirty_inode, .write_inode = jfs_write_inode, - .delete_inode = jfs_delete_inode, - .clear_inode = jfs_clear_inode, + .evict_inode = jfs_evict_inode, .put_super = jfs_put_super, .sync_fs = jfs_sync_fs, .freeze_fs = jfs_freeze, -- cgit v1.2.3-70-g09d2 From d640e1b50885b5beb61ccacdebf9f3f05ee2119c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 00:34:05 -0400 Subject: switch ubifs to ->evict_inode() Signed-off-by: Al Viro --- fs/ubifs/super.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 4d2f2157dd3..899066dd0c1 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -327,7 +327,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) return err; } -static void ubifs_delete_inode(struct inode *inode) +static void ubifs_evict_inode(struct inode *inode) { int err; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -343,9 +343,12 @@ static void ubifs_delete_inode(struct inode *inode) dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); ubifs_assert(!atomic_read(&inode->i_count)); - ubifs_assert(inode->i_nlink == 0); truncate_inode_pages(&inode->i_data, 0); + + if (inode->i_nlink) + goto done; + if (is_bad_inode(inode)) goto out; @@ -367,7 +370,8 @@ out: c->nospace = c->nospace_rp = 0; smp_wmb(); } - clear_inode(inode); +done: + end_writeback(inode); } static void ubifs_dirty_inode(struct inode *inode) @@ -1824,7 +1828,7 @@ const struct super_operations ubifs_super_operations = { .destroy_inode = ubifs_destroy_inode, .put_super = ubifs_put_super, .write_inode = ubifs_write_inode, - .delete_inode = ubifs_delete_inode, + .evict_inode = ubifs_evict_inode, .statfs = ubifs_statfs, .dirty_inode = ubifs_dirty_inode, .remount_fs = ubifs_remount_fs, -- cgit v1.2.3-70-g09d2 From 3aac2b62e0f345c8a637cf94dc62e9000de9d8b6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 00:43:39 -0400 Subject: switch udf to ->evict_inode() Signed-off-by: Al Viro --- fs/udf/ialloc.c | 2 -- fs/udf/inode.c | 48 +++++++++++++++++++----------------------------- fs/udf/super.c | 3 +-- fs/udf/udfdecl.h | 3 +-- 4 files changed, 21 insertions(+), 35 deletions(-) diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 18cd7111185..75d9304d0dc 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -31,8 +31,6 @@ void udf_free_inode(struct inode *inode) struct super_block *sb = inode->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); - clear_inode(inode); - mutex_lock(&sbi->s_alloc_mutex); if (sbi->s_lvid_bh) { struct logicalVolIntegrityDescImpUse *lvidiu = diff --git a/fs/udf/inode.c b/fs/udf/inode.c index ecddcc2ed74..fc48f37aa2d 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -68,37 +68,23 @@ static void udf_update_extents(struct inode *, static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); -void udf_delete_inode(struct inode *inode) -{ - truncate_inode_pages(&inode->i_data, 0); - - if (is_bad_inode(inode)) - goto no_delete; - - inode->i_size = 0; - udf_truncate(inode); - lock_kernel(); - - udf_update_inode(inode, IS_SYNC(inode)); - udf_free_inode(inode); - - unlock_kernel(); - return; - -no_delete: - clear_inode(inode); -} - -/* - * If we are going to release inode from memory, we truncate last inode extent - * to proper length. We could use drop_inode() but it's called under inode_lock - * and thus we cannot mark inode dirty there. We use clear_inode() but we have - * to make sure to write inode as it's not written automatically. - */ -void udf_clear_inode(struct inode *inode) +void udf_evict_inode(struct inode *inode) { struct udf_inode_info *iinfo = UDF_I(inode); + int want_delete = 0; + truncate_inode_pages(&inode->i_data, 0); + + if (!inode->i_nlink && !is_bad_inode(inode)) { + want_delete = 1; + inode->i_size = 0; + udf_truncate(inode); + lock_kernel(); + udf_update_inode(inode, IS_SYNC(inode)); + unlock_kernel(); + } + invalidate_inode_buffers(inode); + end_writeback(inode); if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && inode->i_size != iinfo->i_lenExtents) { printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " @@ -108,9 +94,13 @@ void udf_clear_inode(struct inode *inode) (unsigned long long)inode->i_size, (unsigned long long)iinfo->i_lenExtents); } - kfree(iinfo->i_ext.i_data); iinfo->i_ext.i_data = NULL; + if (want_delete) { + lock_kernel(); + udf_free_inode(inode); + unlock_kernel(); + } } static int udf_writepage(struct page *page, struct writeback_control *wbc) diff --git a/fs/udf/super.c b/fs/udf/super.c index 612d1e2e285..f9f4a9a0ea2 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -175,8 +175,7 @@ static const struct super_operations udf_sb_ops = { .alloc_inode = udf_alloc_inode, .destroy_inode = udf_destroy_inode, .write_inode = udf_write_inode, - .delete_inode = udf_delete_inode, - .clear_inode = udf_clear_inode, + .evict_inode = udf_evict_inode, .put_super = udf_put_super, .sync_fs = udf_sync_fs, .statfs = udf_statfs, diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 2bac0354891..6995ab1f430 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -139,8 +139,7 @@ extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); extern struct buffer_head *udf_bread(struct inode *, int, int, int *); extern void udf_truncate(struct inode *); extern void udf_read_inode(struct inode *); -extern void udf_delete_inode(struct inode *); -extern void udf_clear_inode(struct inode *); +extern void udf_evict_inode(struct inode *); extern int udf_write_inode(struct inode *, struct writeback_control *wbc); extern long udf_block_map(struct inode *, sector_t); extern int udf_extend_file(struct inode *, struct extent_position *, -- cgit v1.2.3-70-g09d2 From 94ee8494ac84606f06d522a2c016d40aabffb378 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 00:45:56 -0400 Subject: switch ncpfs to ->evict_inode() Signed-off-by: Al Viro --- fs/ncpfs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index b4e8aaae14b..c0434313f92 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -43,7 +43,7 @@ #define NCP_DEFAULT_TIME_OUT 10 #define NCP_DEFAULT_RETRY_COUNT 20 -static void ncp_delete_inode(struct inode *); +static void ncp_evict_inode(struct inode *); static void ncp_put_super(struct super_block *); static int ncp_statfs(struct dentry *, struct kstatfs *); static int ncp_show_options(struct seq_file *, struct vfsmount *); @@ -100,7 +100,7 @@ static const struct super_operations ncp_sops = .alloc_inode = ncp_alloc_inode, .destroy_inode = ncp_destroy_inode, .drop_inode = generic_delete_inode, - .delete_inode = ncp_delete_inode, + .evict_inode = ncp_evict_inode, .put_super = ncp_put_super, .statfs = ncp_statfs, .remount_fs = ncp_remount, @@ -282,19 +282,19 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) } static void -ncp_delete_inode(struct inode *inode) +ncp_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); if (S_ISDIR(inode->i_mode)) { - DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); + DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino); } if (ncp_make_closed(inode) != 0) { /* We can't do anything but complain. */ - printk(KERN_ERR "ncp_delete_inode: could not close\n"); + printk(KERN_ERR "ncp_evict_inode: could not close\n"); } - clear_inode(inode); } static void ncp_stop_tasks(struct ncp_server *server) { -- cgit v1.2.3-70-g09d2 From 066d92dcbfa5842d98f6c4c671220cef50a9720f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Jun 2010 21:28:10 -0400 Subject: convert ocfs2 to ->evict_inode() Signed-off-by: Al Viro --- fs/ocfs2/inode.c | 21 ++++++++++++++------- fs/ocfs2/inode.h | 3 +-- fs/ocfs2/super.c | 3 +-- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index abb0a95cc71..eb7fd07c90f 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -969,7 +969,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, truncate_inode_pages(&inode->i_data, 0); } -void ocfs2_delete_inode(struct inode *inode) +static void ocfs2_delete_inode(struct inode *inode) { int wipe, status; sigset_t oldset; @@ -1075,20 +1075,17 @@ bail_unlock_nfs_sync: bail_unblock: ocfs2_unblock_signals(&oldset); bail: - clear_inode(inode); mlog_exit_void(); } -void ocfs2_clear_inode(struct inode *inode) +static void ocfs2_clear_inode(struct inode *inode) { int status; struct ocfs2_inode_info *oi = OCFS2_I(inode); mlog_entry_void(); - if (!inode) - goto bail; - + end_writeback(inode); mlog(0, "Clearing inode: %llu, nlink = %u\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); @@ -1180,10 +1177,20 @@ void ocfs2_clear_inode(struct inode *inode) jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, &oi->ip_jinode); -bail: mlog_exit_void(); } +void ocfs2_evict_inode(struct inode *inode) +{ + if (!inode->i_nlink || + (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { + ocfs2_delete_inode(inode); + } else { + truncate_inode_pages(&inode->i_data, 0); + } + ocfs2_clear_inode(inode); +} + /* Called under inode_lock, with no more references on the * struct inode, so it's safe here to check the flags field * and to manipulate i_nlink without any other locks. */ diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 9f5f5fcadc4..975eedd7b24 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -123,8 +123,7 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode) return &OCFS2_I(inode)->ip_metadata_cache; } -void ocfs2_clear_inode(struct inode *inode); -void ocfs2_delete_inode(struct inode *inode); +void ocfs2_evict_inode(struct inode *inode); void ocfs2_drop_inode(struct inode *inode); /* Flags for ocfs2_iget() */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0eaa929a4db..ae1a4437a98 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -145,8 +145,7 @@ static const struct super_operations ocfs2_sops = { .alloc_inode = ocfs2_alloc_inode, .destroy_inode = ocfs2_destroy_inode, .drop_inode = ocfs2_drop_inode, - .clear_inode = ocfs2_clear_inode, - .delete_inode = ocfs2_delete_inode, + .evict_inode = ocfs2_evict_inode, .sync_fs = ocfs2_sync_fs, .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, -- cgit v1.2.3-70-g09d2 From d5c1515cf374951f07e5bf97b6ff3718d3401b6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 11:05:19 -0400 Subject: switch gfs2 to ->evict_inode() Signed-off-by: Al Viro --- fs/gfs2/super.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4d1aad38f1b..555f5a417c6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1203,25 +1203,6 @@ static void gfs2_drop_inode(struct inode *inode) generic_drop_inode(inode); } -/** - * gfs2_clear_inode - Deallocate an inode when VFS is done with it - * @inode: The VFS inode - * - */ - -static void gfs2_clear_inode(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - - ip->i_gl->gl_object = NULL; - gfs2_glock_put(ip->i_gl); - ip->i_gl = NULL; - if (ip->i_iopen_gh.gh_gl) { - ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); - } -} - static int is_ancestor(const struct dentry *d1, const struct dentry *d2) { do { @@ -1347,13 +1328,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) * is safe, just less efficient. */ -static void gfs2_delete_inode(struct inode *inode) +static void gfs2_evict_inode(struct inode *inode) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; + if (inode->i_nlink) + goto out; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (unlikely(error)) { gfs2_glock_dq_uninit(&ip->i_iopen_gh); @@ -1407,10 +1391,18 @@ out_unlock: gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); if (error && error != GLR_TRYFAILED && error != -EROFS) - fs_warn(sdp, "gfs2_delete_inode: %d\n", error); + fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + end_writeback(inode); + + ip->i_gl->gl_object = NULL; + gfs2_glock_put(ip->i_gl); + ip->i_gl = NULL; + if (ip->i_iopen_gh.gh_gl) { + ip->i_iopen_gh.gh_gl->gl_object = NULL; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + } } static struct inode *gfs2_alloc_inode(struct super_block *sb) @@ -1434,14 +1426,13 @@ const struct super_operations gfs2_super_ops = { .alloc_inode = gfs2_alloc_inode, .destroy_inode = gfs2_destroy_inode, .write_inode = gfs2_write_inode, - .delete_inode = gfs2_delete_inode, + .evict_inode = gfs2_evict_inode, .put_super = gfs2_put_super, .sync_fs = gfs2_sync_fs, .freeze_fs = gfs2_freeze, .unfreeze_fs = gfs2_unfreeze, .statfs = gfs2_statfs, .remount_fs = gfs2_remount_fs, - .clear_inode = gfs2_clear_inode, .drop_inode = gfs2_drop_inode, .show_options = gfs2_show_options, }; -- cgit v1.2.3-70-g09d2 From bd55597520a2eaa0d71dd7683513a14bfd1bdf5c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 11:35:40 -0400 Subject: convert btrfs to ->evict_inode() NB: do we want btrfs_wait_ordered_range() on eviction of inodes with positive i_nlink on subvolume with zero root_refs? If not, btrfs_evict_inode() can be simplified by unconditionally bailing out in case of i_nlink > 0 in the very beginning... Signed-off-by: Al Viro --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 8 ++++++-- fs/btrfs/super.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 29c20092847..394d5422ab6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2389,7 +2389,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping, pgoff_t offset, pgoff_t last_index); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); -void btrfs_delete_inode(struct inode *inode); +void btrfs_evict_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); void btrfs_dirty_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 95eac011696..ce02199ec4e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3668,7 +3668,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; } -void btrfs_delete_inode(struct inode *inode) +void btrfs_evict_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3676,10 +3676,14 @@ void btrfs_delete_inode(struct inode *inode) int ret; truncate_inode_pages(&inode->i_data, 0); + if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) + goto no_delete; + if (is_bad_inode(inode)) { btrfs_orphan_del(NULL, inode); goto no_delete; } + /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { @@ -3729,7 +3733,7 @@ void btrfs_delete_inode(struct inode *inode) btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); no_delete: - clear_inode(inode); + end_writeback(inode); return; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2393b39031..1776dbd8dc9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -797,7 +797,7 @@ static int btrfs_unfreeze(struct super_block *sb) static const struct super_operations btrfs_super_ops = { .drop_inode = btrfs_drop_inode, - .delete_inode = btrfs_delete_inode, + .evict_inode = btrfs_evict_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, -- cgit v1.2.3-70-g09d2 From 845a2cc0507055278e0fa722ed0f8c791b7401dd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 11:37:37 -0400 Subject: convert reiserfs to ->evict_inode() Signed-off-by: Al Viro --- fs/reiserfs/inode.c | 13 ++++++++++--- fs/reiserfs/super.c | 8 +------- include/linux/reiserfs_fs.h | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 46ba1cfc2df..a94e08b339f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -25,7 +25,7 @@ int reiserfs_commit_write(struct file *f, struct page *page, int reiserfs_prepare_write(struct file *f, struct page *page, unsigned from, unsigned to); -void reiserfs_delete_inode(struct inode *inode) +void reiserfs_evict_inode(struct inode *inode) { /* We need blocks for transaction + (user+group) quota update (possibly delete) */ int jbegin_count = @@ -35,10 +35,12 @@ void reiserfs_delete_inode(struct inode *inode) int depth; int err; - if (!is_bad_inode(inode)) + if (!inode->i_nlink && !is_bad_inode(inode)) dquot_initialize(inode); truncate_inode_pages(&inode->i_data, 0); + if (inode->i_nlink) + goto no_delete; depth = reiserfs_write_lock_once(inode->i_sb); @@ -77,9 +79,14 @@ void reiserfs_delete_inode(struct inode *inode) ; } out: - clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ + end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */ + dquot_drop(inode); inode->i_blocks = 0; reiserfs_write_unlock_once(inode->i_sb, depth); + +no_delete: + end_writeback(inode); + dquot_drop(inode); } static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1e1ee9056eb..e15ff612002 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -591,11 +591,6 @@ out: reiserfs_write_unlock_once(inode->i_sb, lock_depth); } -static void reiserfs_clear_inode(struct inode *inode) -{ - dquot_drop(inode); -} - #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); @@ -608,8 +603,7 @@ static const struct super_operations reiserfs_sops = { .destroy_inode = reiserfs_destroy_inode, .write_inode = reiserfs_write_inode, .dirty_inode = reiserfs_dirty_inode, - .clear_inode = reiserfs_clear_inode, - .delete_inode = reiserfs_delete_inode, + .evict_inode = reiserfs_evict_inode, .put_super = reiserfs_put_super, .write_super = reiserfs_write_super, .sync_fs = reiserfs_sync_fs, diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 3b603f47418..2a464ae147c 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2033,7 +2033,7 @@ void reiserfs_read_locked_inode(struct inode *inode, struct reiserfs_iget_args *args); int reiserfs_find_actor(struct inode *inode, void *p); int reiserfs_init_locked_inode(struct inode *inode, void *p); -void reiserfs_delete_inode(struct inode *inode); +void reiserfs_evict_inode(struct inode *inode); int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); int reiserfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create); -- cgit v1.2.3-70-g09d2 From 4ec70c9b46b032e7f1b41b543c607d6a33b78a1a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 11:42:26 -0400 Subject: convert exofs to ->evict_inode() Signed-off-by: Al Viro --- fs/exofs/exofs.h | 2 +- fs/exofs/inode.c | 8 ++++---- fs/exofs/super.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 0706ce996c8..2dc925fa101 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -263,7 +263,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, extern struct inode *exofs_iget(struct super_block *, unsigned long); struct inode *exofs_new_inode(struct inode *, int); extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); -extern void exofs_delete_inode(struct inode *); +extern void exofs_evict_inode(struct inode *); /* dir.c: */ int exofs_add_link(struct dentry *, struct inode *); diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ccd0ce3eea7..088cb476b68 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1307,7 +1307,7 @@ static void delete_done(struct exofs_io_state *ios, void *p) * from the OSD here. We make sure the object was created before we try and * delete it. */ -void exofs_delete_inode(struct inode *inode) +void exofs_evict_inode(struct inode *inode) { struct exofs_i_info *oi = exofs_i(inode); struct super_block *sb = inode->i_sb; @@ -1318,11 +1318,11 @@ void exofs_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); /* TODO: should do better here */ - if (is_bad_inode(inode)) + if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; inode->i_size = 0; - clear_inode(inode); + end_writeback(inode); /* if we are deleting an obj that hasn't been created yet, wait */ if (!obj_created(oi)) { @@ -1353,5 +1353,5 @@ void exofs_delete_inode(struct inode *inode) return; no_delete: - clear_inode(inode); + end_writeback(inode); } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 03149b9a517..32cfd61def5 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -743,7 +743,7 @@ static const struct super_operations exofs_sops = { .alloc_inode = exofs_alloc_inode, .destroy_inode = exofs_destroy_inode, .write_inode = exofs_write_inode, - .delete_inode = exofs_delete_inode, + .evict_inode = exofs_evict_inode, .put_super = exofs_put_super, .write_super = exofs_write_super, .sync_fs = exofs_sync_fs, -- cgit v1.2.3-70-g09d2 From 6fd1e5c994c392ebdbe45600051b2a32ec4860f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 11:55:00 -0400 Subject: convert nilfs2 to ->evict_inode() [folded build fix from sfr] Signed-off-by: Al Viro --- fs/nilfs2/inode.c | 28 ++++++++++++++++++++++++---- fs/nilfs2/nilfs.h | 2 +- fs/nilfs2/super.c | 20 +------------------- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 051d279abb3..eccb2f2e231 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -27,6 +27,7 @@ #include #include #include "nilfs.h" +#include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" @@ -354,7 +355,6 @@ void nilfs_free_inode(struct inode *inode) struct super_block *sb = inode->i_sb; struct nilfs_sb_info *sbi = NILFS_SB(sb); - clear_inode(inode); /* XXX: check error code? Is there any thing I can do? */ (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); atomic_dec(&sbi->s_inodes_count); @@ -614,16 +614,34 @@ void nilfs_truncate(struct inode *inode) But truncate has no return value. */ } -void nilfs_delete_inode(struct inode *inode) +static void nilfs_clear_inode(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + + /* + * Free resources allocated in nilfs_read_inode(), here. + */ + BUG_ON(!list_empty(&ii->i_dirty)); + brelse(ii->i_bh); + ii->i_bh = NULL; + + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); + + nilfs_btnode_cache_clear(&ii->i_btnode_cache); +} + +void nilfs_evict_inode(struct inode *inode) { struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); - if (unlikely(is_bad_inode(inode))) { + if (inode->i_nlink || unlikely(is_bad_inode(inode))) { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + end_writeback(inode); + nilfs_clear_inode(inode); return; } nilfs_transaction_begin(sb, &ti, 0); /* never fails */ @@ -633,6 +651,8 @@ void nilfs_delete_inode(struct inode *inode) nilfs_truncate_bmap(ii, 0); nilfs_mark_inode_dirty(inode); + end_writeback(inode); + nilfs_clear_inode(inode); nilfs_free_inode(inode); /* nilfs_free_inode() marks inode buffer dirty */ if (IS_SYNC(inode)) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 47d6d792812..f0327974809 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -245,7 +245,7 @@ extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); extern struct inode *nilfs_iget(struct super_block *, unsigned long); extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); -extern void nilfs_delete_inode(struct inode *); +extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, struct buffer_head **); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 414ef68931c..7c7572a4e13 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -159,23 +159,6 @@ void nilfs_destroy_inode(struct inode *inode) kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } -static void nilfs_clear_inode(struct inode *inode) -{ - struct nilfs_inode_info *ii = NILFS_I(inode); - - /* - * Free resources allocated in nilfs_read_inode(), here. - */ - BUG_ON(!list_empty(&ii->i_dirty)); - brelse(ii->i_bh); - ii->i_bh = NULL; - - if (test_bit(NILFS_I_BMAP, &ii->i_state)) - nilfs_bmap_clear(ii->i_bmap); - - nilfs_btnode_cache_clear(&ii->i_btnode_cache); -} - static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) { struct the_nilfs *nilfs = sbi->s_nilfs; @@ -467,7 +450,7 @@ static const struct super_operations nilfs_sops = { /* .write_inode = nilfs_write_inode, */ /* .put_inode = nilfs_put_inode, */ /* .drop_inode = nilfs_drop_inode, */ - .delete_inode = nilfs_delete_inode, + .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, /* .write_super = nilfs_write_super, */ .sync_fs = nilfs_sync_fs, @@ -475,7 +458,6 @@ static const struct super_operations nilfs_sops = { /* .unlockfs */ .statfs = nilfs_statfs, .remount_fs = nilfs_remount, - .clear_inode = nilfs_clear_inode, /* .umount_begin */ .show_options = nilfs_show_options }; -- cgit v1.2.3-70-g09d2 From 8e22c1a4e429e9facf309c7e7a03ba9cdfd7b106 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 12:22:31 -0400 Subject: logfs: get rid of magical inodes ordering problems at ->kill_sb() time are solved by doing iput() of these suckers in ->put_super() Signed-off-by: Al Viro --- fs/logfs/inode.c | 44 ++++++++++++++++---------------------------- fs/logfs/journal.c | 2 -- fs/logfs/logfs.h | 1 - fs/logfs/readwrite.c | 1 - fs/logfs/segment.c | 1 - fs/logfs/super.c | 23 +++++++++++++++-------- 6 files changed, 31 insertions(+), 41 deletions(-) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index f602e230e16..7811a2a3593 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -235,33 +235,21 @@ static struct inode *logfs_alloc_inode(struct super_block *sb) * purpose is to create a new inode that will not trigger the warning if such * an inode is still in use. An ugly hack, no doubt. Suggections for * improvement are welcome. + * + * AV: that's what ->put_super() is for... */ struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino) { struct inode *inode; - inode = logfs_alloc_inode(sb); + inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); inode->i_mode = S_IFREG; inode->i_ino = ino; - inode->i_sb = sb; - - /* This is a blatant copy of alloc_inode code. We'd need alloc_inode - * to be nonstatic, alas. */ - { - struct address_space * const mapping = &inode->i_data; - - mapping->a_ops = &logfs_reg_aops; - mapping->host = inode; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = &default_backing_dev_info; - inode->i_mapping = mapping; - inode->i_nlink = 1; - } + inode->i_data.a_ops = &logfs_reg_aops; + mapping_set_gfp_mask(&inode->i_data, GFP_NOFS); return inode; } @@ -277,7 +265,7 @@ struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino) err = logfs_read_inode(inode); if (err) { - destroy_meta_inode(inode); + iput(inode); return ERR_PTR(err); } logfs_inode_setops(inode); @@ -298,16 +286,6 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) return ret; } -void destroy_meta_inode(struct inode *inode) -{ - if (inode) { - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); - logfs_clear_inode(inode); - kmem_cache_free(logfs_inode_cache, logfs_inode(inode)); - } -} - /* called with inode_lock held */ static void logfs_drop_inode(struct inode *inode) { @@ -384,12 +362,22 @@ static int logfs_sync_fs(struct super_block *sb, int wait) return 0; } +static void logfs_put_super(struct super_block *sb) +{ + struct logfs_super *super = logfs_super(sb); + /* kill the meta-inodes */ + iput(super->s_master_inode); + iput(super->s_segfile_inode); + iput(super->s_mapping_inode); +} + const struct super_operations logfs_super_operations = { .alloc_inode = logfs_alloc_inode, .clear_inode = logfs_clear_inode, .delete_inode = logfs_delete_inode, .destroy_inode = logfs_destroy_inode, .drop_inode = logfs_drop_inode, + .put_super = logfs_put_super, .write_inode = logfs_write_inode, .statfs = logfs_statfs, .sync_fs = logfs_sync_fs, diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 4b0e0616b35..f46ee8b0e13 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -889,8 +889,6 @@ void logfs_cleanup_journal(struct super_block *sb) struct logfs_super *super = logfs_super(sb); btree_grim_visitor32(&super->s_reserved_segments, 0, NULL); - destroy_meta_inode(super->s_master_inode); - super->s_master_inode = NULL; kfree(super->s_compressed_je); kfree(super->s_je); diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index c838c4d7211..5e65171dad2 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -525,7 +525,6 @@ struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino); struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino); int logfs_init_inode_cache(void); void logfs_destroy_inode_cache(void); -void destroy_meta_inode(struct inode *inode); void logfs_set_blocks(struct inode *inode, u64 no); /* these logically belong into inode.c but actually reside in readwrite.c */ int logfs_read_inode(struct inode *inode); diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 0718d112a1a..580d126d597 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -2272,7 +2272,6 @@ void logfs_cleanup_rw(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); - destroy_meta_inode(super->s_segfile_inode); logfs_mempool_destroy(super->s_block_pool); logfs_mempool_destroy(super->s_shadow_pool); } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index a9657afb70a..9d518735325 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -929,5 +929,4 @@ void logfs_cleanup_areas(struct super_block *sb) for_each_area(i) free_area(super->s_area[i]); free_area(super->s_journal_area); - destroy_meta_inode(super->s_mapping_inode); } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index d651e10a1e9..5336155c5d8 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -342,24 +342,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) goto fail; } + /* at that point we know that ->put_super() will be called */ super->s_erase_page = alloc_pages(GFP_KERNEL, 0); if (!super->s_erase_page) - goto fail; + return -ENOMEM; memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); /* FIXME: check for read-only mounts */ err = logfs_make_writeable(sb); - if (err) - goto fail1; + if (err) { + __free_page(super->s_erase_page); + return err; + } log_super("LogFS: Finished mounting\n"); simple_set_mnt(mnt, sb); return 0; -fail1: - __free_page(super->s_erase_page); fail: - iput(logfs_super(sb)->s_master_inode); + iput(super->s_master_inode); + iput(super->s_segfile_inode); + iput(super->s_mapping_inode); return -EIO; } @@ -580,10 +583,14 @@ int logfs_get_sb_device(struct file_system_type *type, int flags, sb->s_flags |= MS_ACTIVE; err = logfs_get_sb_final(sb, mnt); if (err) - goto err1; - return 0; + deactivate_locked_super(sb); + return err; err1: + /* no ->s_root, no ->put_super() */ + iput(super->s_master_inode); + iput(super->s_segfile_inode); + iput(super->s_mapping_inode); deactivate_locked_super(sb); return err; err0: -- cgit v1.2.3-70-g09d2 From 7da08fd17a6e42d80f0f3897a5cbd682e77bcdb4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 13:11:34 -0400 Subject: convert logfs to ->evict_inode() Signed-off-by: Al Viro --- fs/logfs/inode.c | 3 +-- fs/logfs/logfs.h | 3 +-- fs/logfs/readwrite.c | 61 +++++++++++++++++++++++++--------------------------- 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 7811a2a3593..78be674d95c 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -373,9 +373,8 @@ static void logfs_put_super(struct super_block *sb) const struct super_operations logfs_super_operations = { .alloc_inode = logfs_alloc_inode, - .clear_inode = logfs_clear_inode, - .delete_inode = logfs_delete_inode, .destroy_inode = logfs_destroy_inode, + .evict_inode = logfs_evict_inode, .drop_inode = logfs_drop_inode, .put_super = logfs_put_super, .write_inode = logfs_write_inode, diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 5e65171dad2..5e3b7207795 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -529,8 +529,7 @@ void logfs_set_blocks(struct inode *inode, u64 no); /* these logically belong into inode.c but actually reside in readwrite.c */ int logfs_read_inode(struct inode *inode); int __logfs_write_inode(struct inode *inode, long flags); -void logfs_delete_inode(struct inode *inode); -void logfs_clear_inode(struct inode *inode); +void logfs_evict_inode(struct inode *inode); /* journal.c */ void logfs_write_anchor(struct super_block *sb); diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 580d126d597..6127baf0e18 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1972,31 +1972,6 @@ static struct page *inode_to_page(struct inode *inode) return page; } -/* Cheaper version of write_inode. All changes are concealed in - * aliases, which are moved back. No write to the medium happens. - */ -void logfs_clear_inode(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - struct logfs_inode *li = logfs_inode(inode); - struct logfs_block *block = li->li_block; - struct page *page; - - /* Only deleted files may be dirty at this point */ - BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink); - if (!block) - return; - if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) { - block->ops->free_block(inode->i_sb, block); - return; - } - - BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); - page = inode_to_page(inode); - BUG_ON(!page); /* FIXME: Use emergency page */ - logfs_put_write_page(page); -} - static int do_write_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -2164,18 +2139,40 @@ static int do_delete_inode(struct inode *inode) * ZOMBIE inodes have already been deleted before and should remain dead, * if it weren't for valid checking. No need to kill them again here. */ -void logfs_delete_inode(struct inode *inode) +void logfs_evict_inode(struct inode *inode) { + struct super_block *sb = inode->i_sb; struct logfs_inode *li = logfs_inode(inode); + struct logfs_block *block = li->li_block; + struct page *page; - if (!(li->li_flags & LOGFS_IF_ZOMBIE)) { - li->li_flags |= LOGFS_IF_ZOMBIE; - if (i_size_read(inode) > 0) - logfs_truncate(inode, 0); - do_delete_inode(inode); + if (!inode->i_nlink) { + if (!(li->li_flags & LOGFS_IF_ZOMBIE)) { + li->li_flags |= LOGFS_IF_ZOMBIE; + if (i_size_read(inode) > 0) + logfs_truncate(inode, 0); + do_delete_inode(inode); + } } truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + end_writeback(inode); + + /* Cheaper version of write_inode. All changes are concealed in + * aliases, which are moved back. No write to the medium happens. + */ + /* Only deleted files may be dirty at this point */ + BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink); + if (!block) + return; + if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) { + block->ops->free_block(inode->i_sb, block); + return; + } + + BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); + page = inode_to_page(inode); + BUG_ON(!page); /* FIXME: Use emergency page */ + logfs_put_write_page(page); } void btree_write_block(struct logfs_block *block) -- cgit v1.2.3-70-g09d2 From 0930fcc1ee2f0a810b938bc283a3a262d7adccbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 13:16:22 -0400 Subject: convert ext4 to ->evict_inode() pretty much brute-force... Signed-off-by: Al Viro --- fs/ext4/ext4.h | 3 ++- fs/ext4/ialloc.c | 2 +- fs/ext4/inode.c | 11 ++++++++--- fs/ext4/super.c | 10 +++++----- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 19a4de57128..6a0d52ca143 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1571,7 +1571,8 @@ extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -extern void ext4_delete_inode(struct inode *); +extern void ext4_evict_inode(struct inode *); +extern void ext4_clear_inode(struct inode *); extern int ext4_sync_inode(handle_t *, struct inode *); extern void ext4_dirty_inode(struct inode *); extern int ext4_change_inode_journal_flag(struct inode *, int); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25c4b3173fd..07e791a856d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -222,7 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) is_directory = S_ISDIR(inode->i_mode); /* Do this BEFORE marking the inode not in use or returning an error */ - clear_inode(inode); + ext4_clear_inode(inode); es = EXT4_SB(sb)->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1fb390359bc..c6d365f9c66 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, /* * Called at the last iput() if i_nlink is zero. */ -void ext4_delete_inode(struct inode *inode) +void ext4_evict_inode(struct inode *inode) { handle_t *handle; int err; + if (inode->i_nlink) { + truncate_inode_pages(&inode->i_data, 0); + goto no_delete; + } + if (!is_bad_inode(inode)) dquot_initialize(inode); @@ -245,13 +250,13 @@ void ext4_delete_inode(struct inode *inode) */ if (ext4_mark_inode_dirty(handle, inode)) /* If that failed, just do the required in-core inode clear. */ - clear_inode(inode); + ext4_clear_inode(inode); else ext4_free_inode(handle, inode); ext4_journal_stop(handle); return; no_delete: - clear_inode(inode); /* We must guarantee clearing of inode... */ + ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } typedef struct { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4e8983a9811..f627a6a4c31 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -813,8 +813,10 @@ static void destroy_inodecache(void) kmem_cache_destroy(ext4_inode_cachep); } -static void ext4_clear_inode(struct inode *inode) +void ext4_clear_inode(struct inode *inode) { + invalidate_inode_buffers(inode); + end_writeback(inode); dquot_drop(inode); ext4_discard_preallocations(inode); if (EXT4_JOURNAL(inode)) @@ -1100,14 +1102,13 @@ static const struct super_operations ext4_sops = { .destroy_inode = ext4_destroy_inode, .write_inode = ext4_write_inode, .dirty_inode = ext4_dirty_inode, - .delete_inode = ext4_delete_inode, + .evict_inode = ext4_evict_inode, .put_super = ext4_put_super, .sync_fs = ext4_sync_fs, .freeze_fs = ext4_freeze, .unfreeze_fs = ext4_unfreeze, .statfs = ext4_statfs, .remount_fs = ext4_remount, - .clear_inode = ext4_clear_inode, .show_options = ext4_show_options, #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, @@ -1121,12 +1122,11 @@ static const struct super_operations ext4_nojournal_sops = { .destroy_inode = ext4_destroy_inode, .write_inode = ext4_write_inode, .dirty_inode = ext4_dirty_inode, - .delete_inode = ext4_delete_inode, + .evict_inode = ext4_evict_inode, .write_super = ext4_write_super, .put_super = ext4_put_super, .statfs = ext4_statfs, .remount_fs = ext4_remount, - .clear_inode = ext4_clear_inode, .show_options = ext4_show_options, #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, -- cgit v1.2.3-70-g09d2 From 07958f9f5b9e8422c15368a1733a52ea99009896 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 13:20:09 -0400 Subject: ->delete_inode() is gone Signed-off-by: Al Viro --- fs/inode.c | 2 -- include/linux/fs.h | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 93e7a5ecbc2..7a1bea9cb8e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -335,8 +335,6 @@ static void evict(struct inode *inode, int delete) if (op->evict_inode) { op->evict_inode(inode); - } else if (delete && op->delete_inode) { - op->delete_inode(inode); } else { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); diff --git a/include/linux/fs.h b/include/linux/fs.h index 3c23c1dcb1b..2b1254771e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1564,7 +1564,6 @@ struct super_operations { int (*write_inode) (struct inode *, struct writeback_control *wbc); void (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); - void (*delete_inode) (struct inode *); void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); -- cgit v1.2.3-70-g09d2 From 644da5960ded137c339bc69bc2aeac54f73aad59 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 13:21:05 -0400 Subject: fs/inode.c:evict() doesn't care about delete vs. non-delete paths now Signed-off-by: Al Viro --- fs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 7a1bea9cb8e..8320bef7177 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -329,7 +329,7 @@ void clear_inode(struct inode *inode) } EXPORT_SYMBOL(clear_inode); -static void evict(struct inode *inode, int delete) +static void evict(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; @@ -363,7 +363,7 @@ static void dispose_list(struct list_head *head) inode = list_first_entry(head, struct inode, i_list); list_del(&inode->i_list); - evict(inode, 0); + evict(inode); spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); @@ -1224,7 +1224,7 @@ void generic_delete_inode(struct inode *inode) inodes_stat.nr_inodes--; spin_unlock(&inode_lock); - evict(inode, 1); + evict(inode); spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); @@ -1279,7 +1279,7 @@ static void generic_forget_inode(struct inode *inode) { if (!generic_detach_inode(inode)) return; - evict(inode, 0); + evict(inode); wake_up_inode(inode); destroy_inode(inode); } -- cgit v1.2.3-70-g09d2 From 30140837f256558c943636245ab90897a9455a70 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 13:23:20 -0400 Subject: fs/inode.c:clear_inode() is gone Signed-off-by: Al Viro --- fs/inode.c | 28 ++++------------------------ include/linux/fs.h | 1 - 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 8320bef7177..82ca3562a68 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -306,29 +306,6 @@ void end_writeback(struct inode *inode) } EXPORT_SYMBOL(end_writeback); -/** - * clear_inode - clear an inode - * @inode: inode to clear - * - * This is called by the filesystem to tell us - * that the inode is no longer useful. We just - * terminate it with extreme prejudice. - */ -void clear_inode(struct inode *inode) -{ - might_sleep(); - invalidate_inode_buffers(inode); - - BUG_ON(inode->i_data.nrpages); - BUG_ON(!(inode->i_state & I_FREEING)); - BUG_ON(inode->i_state & I_CLEAR); - inode_sync_wait(inode); - if (inode->i_sb->s_op->clear_inode) - inode->i_sb->s_op->clear_inode(inode); - inode->i_state = I_FREEING | I_CLEAR; -} -EXPORT_SYMBOL(clear_inode); - static void evict(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; @@ -338,7 +315,10 @@ static void evict(struct inode *inode) } else { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + invalidate_inode_buffers(inode); + end_writeback(inode); + if (op->clear_inode) + op->clear_inode(inode); } if (S_ISBLK(inode->i_mode) && inode->i_bdev) bd_forget(inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2b1254771e4..4eaa6b2e35d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2182,7 +2182,6 @@ extern void unlock_new_inode(struct inode *); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); -extern void clear_inode(struct inode *); extern void end_writeback(struct inode *); extern void destroy_inode(struct inode *); extern void __destroy_inode(struct inode *); -- cgit v1.2.3-70-g09d2 From 45321ac54316eaeeebde0b5f728a1791e500974c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 13:43:19 -0400 Subject: Make ->drop_inode() just return whether inode needs to be dropped ... and let iput_final() do the actual eviction or retention Signed-off-by: Al Viro --- drivers/staging/pohmelfs/inode.c | 4 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 11 ++-- fs/cifs/cifsfs.c | 9 ++-- fs/gfs2/super.c | 4 +- fs/inode.c | 113 ++++++++++++--------------------------- fs/logfs/inode.c | 4 +- fs/ocfs2/inode.c | 8 +-- fs/ocfs2/inode.h | 2 +- include/linux/fs.h | 6 +-- 10 files changed, 60 insertions(+), 103 deletions(-) diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index e818f53ccfd..100e3a3c1b1 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -1223,7 +1223,7 @@ void pohmelfs_fill_inode(struct inode *inode, struct netfs_inode_info *info) } } -static void pohmelfs_drop_inode(struct inode *inode) +static int pohmelfs_drop_inode(struct inode *inode) { struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); struct pohmelfs_inode *pi = POHMELFS_I(inode); @@ -1232,7 +1232,7 @@ static void pohmelfs_drop_inode(struct inode *inode) list_del_init(&pi->inode_entry); spin_unlock(&psb->ino_lock); - generic_drop_inode(inode); + return generic_drop_inode(inode); } static struct pohmelfs_inode *pohmelfs_get_inode_from_list(struct pohmelfs_sb *psb, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 394d5422ab6..eaf286abad1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2395,7 +2395,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); void btrfs_dirty_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); -void btrfs_drop_inode(struct inode *inode); +int btrfs_drop_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ce02199ec4e..2c54f04a0bf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3943,7 +3943,7 @@ again: if (atomic_read(&inode->i_count) > 1) d_prune_aliases(inode); /* - * btrfs_drop_inode will remove it from + * btrfs_drop_inode will have it removed from * the inode cache when its usage count * hits zero. */ @@ -6337,13 +6337,14 @@ free: kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } -void btrfs_drop_inode(struct inode *inode) +int btrfs_drop_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) - generic_delete_inode(inode); + + if (btrfs_root_refs(&root->root_item) == 0) + return 1; else - generic_drop_inode(inode); + return generic_drop_inode(inode); } static void init_once(void *foo) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8a2cf129e53..20914f5627d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -480,14 +480,13 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data) return 0; } -void cifs_drop_inode(struct inode *inode) +static int cifs_drop_inode(struct inode *inode) { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - return generic_drop_inode(inode); - - return generic_delete_inode(inode); + /* no serverino => unconditional eviction */ + return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) || + generic_drop_inode(inode); } static const struct super_operations cifs_super_ops = { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 555f5a417c6..fa865ab37f1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1191,7 +1191,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) * node for later deallocation. */ -static void gfs2_drop_inode(struct inode *inode) +static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1200,7 +1200,7 @@ static void gfs2_drop_inode(struct inode *inode) if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); } - generic_drop_inode(inode); + return generic_drop_inode(inode); } static int is_ancestor(const struct dentry *d1, const struct dentry *d2) diff --git a/fs/inode.c b/fs/inode.c index 82ca3562a68..0e077619cbf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1183,58 +1183,51 @@ void remove_inode_hash(struct inode *inode) } EXPORT_SYMBOL(remove_inode_hash); +int generic_delete_inode(struct inode *inode) +{ + return 1; +} +EXPORT_SYMBOL(generic_delete_inode); + /* - * Tell the filesystem that this inode is no longer of any interest and should - * be completely destroyed. - * - * We leave the inode in the inode hash table until *after* the filesystem's - * ->delete_inode completes. This ensures that an iget (such as nfsd might - * instigate) will always find up-to-date information either in the hash or on - * disk. - * - * I_FREEING is set so that no-one will take a new reference to the inode while - * it is being deleted. + * Normal UNIX filesystem behaviour: delete the + * inode when the usage count drops to zero, and + * i_nlink is zero. */ -void generic_delete_inode(struct inode *inode) +int generic_drop_inode(struct inode *inode) { - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; - spin_unlock(&inode_lock); - - evict(inode); - - spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); - wake_up_inode(inode); - BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); - destroy_inode(inode); + return !inode->i_nlink || hlist_unhashed(&inode->i_hash); } -EXPORT_SYMBOL(generic_delete_inode); +EXPORT_SYMBOL_GPL(generic_drop_inode); -/** - * generic_detach_inode - remove inode from inode lists - * @inode: inode to remove - * - * Remove inode from inode lists, write it if it's dirty. This is just an - * internal VFS helper exported for hugetlbfs. Do not use! +/* + * Called when we're dropping the last reference + * to an inode. * - * Returns 1 if inode should be completely destroyed. + * Call the FS "drop_inode()" function, defaulting to + * the legacy UNIX filesystem behaviour. If it tells + * us to evict inode, do so. Otherwise, retain inode + * in cache if fs is alive, sync and evict if fs is + * shutting down. */ -static int generic_detach_inode(struct inode *inode) +static void iput_final(struct inode *inode) { struct super_block *sb = inode->i_sb; + const struct super_operations *op = inode->i_sb->s_op; + int drop; + + if (op && op->drop_inode) + drop = op->drop_inode(inode); + else + drop = generic_drop_inode(inode); - if (!hlist_unhashed(&inode->i_hash)) { + if (!drop) { if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); - return 0; + return; } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; @@ -1252,53 +1245,15 @@ static int generic_detach_inode(struct inode *inode) inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); - return 1; -} - -static void generic_forget_inode(struct inode *inode) -{ - if (!generic_detach_inode(inode)) - return; evict(inode); + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + spin_unlock(&inode_lock); wake_up_inode(inode); + BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); destroy_inode(inode); } -/* - * Normal UNIX filesystem behaviour: delete the - * inode when the usage count drops to zero, and - * i_nlink is zero. - */ -void generic_drop_inode(struct inode *inode) -{ - if (!inode->i_nlink) - generic_delete_inode(inode); - else - generic_forget_inode(inode); -} -EXPORT_SYMBOL_GPL(generic_drop_inode); - -/* - * Called when we're dropping the last reference - * to an inode. - * - * Call the FS "drop()" function, defaulting to - * the legacy UNIX filesystem behaviour.. - * - * NOTE! NOTE! NOTE! We're called with the inode lock - * held, and the drop function is supposed to release - * the lock! - */ -static inline void iput_final(struct inode *inode) -{ - const struct super_operations *op = inode->i_sb->s_op; - void (*drop)(struct inode *) = generic_drop_inode; - - if (op && op->drop_inode) - drop = op->drop_inode; - drop(inode); -} - /** * iput - put an inode * @inode: inode to put diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 78be674d95c..d8c71ece098 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -287,7 +287,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) } /* called with inode_lock held */ -static void logfs_drop_inode(struct inode *inode) +static int logfs_drop_inode(struct inode *inode) { struct logfs_super *super = logfs_super(inode->i_sb); struct logfs_inode *li = logfs_inode(inode); @@ -295,7 +295,7 @@ static void logfs_drop_inode(struct inode *inode) spin_lock(&logfs_inode_lock); list_move(&li->li_freeing_list, &super->s_freeing_list); spin_unlock(&logfs_inode_lock); - generic_drop_inode(inode); + return generic_drop_inode(inode); } static void logfs_set_ino_generation(struct super_block *sb, diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index eb7fd07c90f..0492464916b 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1194,9 +1194,10 @@ void ocfs2_evict_inode(struct inode *inode) /* Called under inode_lock, with no more references on the * struct inode, so it's safe here to check the flags field * and to manipulate i_nlink without any other locks. */ -void ocfs2_drop_inode(struct inode *inode) +int ocfs2_drop_inode(struct inode *inode) { struct ocfs2_inode_info *oi = OCFS2_I(inode); + int res; mlog_entry_void(); @@ -1204,11 +1205,12 @@ void ocfs2_drop_inode(struct inode *inode) (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) - generic_delete_inode(inode); + res = 1; else - generic_drop_inode(inode); + res = generic_drop_inode(inode); mlog_exit_void(); + return res; } /* diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 975eedd7b24..6de5a869db3 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -124,7 +124,7 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode) } void ocfs2_evict_inode(struct inode *inode); -void ocfs2_drop_inode(struct inode *inode); +int ocfs2_drop_inode(struct inode *inode); /* Flags for ocfs2_iget() */ #define OCFS2_FI_FLAG_SYSFILE 0x1 diff --git a/include/linux/fs.h b/include/linux/fs.h index 4eaa6b2e35d..8553adbda57 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1562,7 +1562,7 @@ struct super_operations { void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, struct writeback_control *wbc); - void (*drop_inode) (struct inode *); + int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); @@ -2164,8 +2164,8 @@ extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); -extern void generic_delete_inode(struct inode *inode); -extern void generic_drop_inode(struct inode *inode); +extern int generic_delete_inode(struct inode *inode); +extern int generic_drop_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), -- cgit v1.2.3-70-g09d2 From b57922d97fd6f79b6dbe6db0c4fd30d219fa08c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 14:34:48 -0400 Subject: convert remaining ->clear_inode() to ->evict_inode() Signed-off-by: Al Viro --- fs/9p/v9fs_vfs.h | 2 +- fs/9p/vfs_inode.c | 4 +++- fs/9p/vfs_super.c | 4 ++-- fs/afs/inode.c | 5 ++++- fs/afs/internal.h | 2 +- fs/afs/super.c | 2 +- fs/binfmt_misc.c | 5 +++-- fs/block_dev.c | 7 +++++-- fs/cifs/cifsfs.c | 6 ++++-- fs/coda/inode.c | 8 +++++--- fs/ecryptfs/super.c | 8 +++++--- fs/freevxfs/vxfs_extern.h | 2 +- fs/freevxfs/vxfs_inode.c | 8 +++++--- fs/freevxfs/vxfs_super.c | 2 +- fs/fuse/inode.c | 6 ++++-- fs/hfs/hfs_fs.h | 2 +- fs/hfs/inode.c | 4 +++- fs/hfs/super.c | 2 +- fs/hfsplus/super.c | 8 +++++--- fs/inode.c | 2 -- fs/jffs2/fs.c | 6 ++++-- fs/jffs2/os-linux.h | 2 +- fs/jffs2/super.c | 2 +- fs/jffs2/xattr.c | 2 +- fs/nfs/inode.c | 13 +++++++++++-- fs/nfs/internal.h | 4 ++-- fs/nfs/super.c | 4 ++-- fs/ntfs/inode.c | 7 +++++-- fs/ntfs/inode.h | 2 +- fs/ntfs/super.c | 2 +- fs/ocfs2/dlmfs/dlmfs.c | 7 +++---- fs/xfs/linux-2.6/xfs_super.c | 8 +++++--- fs/xfs/linux-2.6/xfs_trace.h | 2 +- include/linux/fs.h | 3 +-- 34 files changed, 94 insertions(+), 59 deletions(-) diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 32ef4009d03..3d056fe01b5 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -52,7 +52,7 @@ void v9fs_destroy_inode(struct inode *inode); #endif struct inode *v9fs_get_inode(struct super_block *sb, int mode); -void v9fs_clear_inode(struct inode *inode); +void v9fs_evict_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); int v9fs_dir_release(struct inode *inode, struct file *filp); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4b3ad6ac9a4..b81ce206508 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -387,8 +387,10 @@ error: * @inode: inode to release * */ -void v9fs_clear_inode(struct inode *inode) +void v9fs_evict_inode(struct inode *inode) { + truncate_inode_pages(inode->i_mapping, 0); + end_writeback(inode); filemap_fdatawrite(inode->i_mapping); #ifdef CONFIG_9P_FSCACHE diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index be74d020436..c6122bf547d 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -257,7 +257,7 @@ static const struct super_operations v9fs_super_ops = { .destroy_inode = v9fs_destroy_inode, #endif .statfs = simple_statfs, - .clear_inode = v9fs_clear_inode, + .evict_inode = v9fs_evict_inode, .show_options = generic_show_options, .umount_begin = v9fs_umount_begin, }; @@ -268,7 +268,7 @@ static const struct super_operations v9fs_super_ops_dotl = { .destroy_inode = v9fs_destroy_inode, #endif .statfs = v9fs_statfs, - .clear_inode = v9fs_clear_inode, + .evict_inode = v9fs_evict_inode, .show_options = generic_show_options, .umount_begin = v9fs_umount_begin, }; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index d00b312e311..320ffef1157 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -316,7 +316,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, /* * clear an AFS inode */ -void afs_clear_inode(struct inode *inode) +void afs_evict_inode(struct inode *inode) { struct afs_permits *permits; struct afs_vnode *vnode; @@ -335,6 +335,9 @@ void afs_clear_inode(struct inode *inode) ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); + afs_give_up_callback(vnode); if (vnode->server) { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5f679b77ce2..8679089ce9a 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -565,7 +565,7 @@ extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int afs_setattr(struct dentry *, struct iattr *); -extern void afs_clear_inode(struct inode *); +extern void afs_evict_inode(struct inode *); /* * main.c diff --git a/fs/afs/super.c b/fs/afs/super.c index e932e5a3a0c..9cf80f02da1 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -49,7 +49,7 @@ static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, .destroy_inode = afs_destroy_inode, - .clear_inode = afs_clear_inode, + .evict_inode = afs_evict_inode, .put_super = afs_put_super, .show_options = generic_show_options, }; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index c4e83537ead..9e60fd20171 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -502,8 +502,9 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) return inode; } -static void bm_clear_inode(struct inode *inode) +static void bm_evict_inode(struct inode *inode) { + end_writeback(inode); kfree(inode->i_private); } @@ -685,7 +686,7 @@ static const struct file_operations bm_status_operations = { static const struct super_operations s_ops = { .statfs = simple_statfs, - .clear_inode = bm_clear_inode, + .evict_inode = bm_evict_inode, }; static int bm_fill_super(struct super_block * sb, void * data, int silent) diff --git a/fs/block_dev.c b/fs/block_dev.c index 63c9d607620..de7b4d0c7e3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -426,10 +426,13 @@ static inline void __bd_forget(struct inode *inode) inode->i_mapping = &inode->i_data; } -static void bdev_clear_inode(struct inode *inode) +static void bdev_evict_inode(struct inode *inode) { struct block_device *bdev = &BDEV_I(inode)->bdev; struct list_head *p; + truncate_inode_pages(&inode->i_data, 0); + invalidate_inode_buffers(inode); /* is it needed here? */ + end_writeback(inode); spin_lock(&bdev_lock); while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { __bd_forget(list_entry(p, struct inode, i_devices)); @@ -443,7 +446,7 @@ static const struct super_operations bdev_sops = { .alloc_inode = bdev_alloc_inode, .destroy_inode = bdev_destroy_inode, .drop_inode = generic_delete_inode, - .clear_inode = bdev_clear_inode, + .evict_inode = bdev_evict_inode, }; static int bd_get_sb(struct file_system_type *fs_type, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 20914f5627d..5574a42b7bb 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -330,8 +330,10 @@ cifs_destroy_inode(struct inode *inode) } static void -cifs_clear_inode(struct inode *inode) +cifs_evict_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); cifs_fscache_release_inode_cookie(inode); } @@ -495,7 +497,7 @@ static const struct super_operations cifs_super_ops = { .alloc_inode = cifs_alloc_inode, .destroy_inode = cifs_destroy_inode, .drop_inode = cifs_drop_inode, - .clear_inode = cifs_clear_inode, + .evict_inode = cifs_evict_inode, /* .delete_inode = cifs_delete_inode, */ /* Do not need above function unless later we add lazy close of inodes or unless the kernel forgets to call us with the same number of releases (closes) diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d97f9935a02..6526e6f21ec 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -35,7 +35,7 @@ #include "coda_int.h" /* VFS super_block ops */ -static void coda_clear_inode(struct inode *); +static void coda_evict_inode(struct inode *); static void coda_put_super(struct super_block *); static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); @@ -93,7 +93,7 @@ static const struct super_operations coda_super_operations = { .alloc_inode = coda_alloc_inode, .destroy_inode = coda_destroy_inode, - .clear_inode = coda_clear_inode, + .evict_inode = coda_evict_inode, .put_super = coda_put_super, .statfs = coda_statfs, .remount_fs = coda_remount, @@ -224,8 +224,10 @@ static void coda_put_super(struct super_block *sb) printk("Coda: Bye bye.\n"); } -static void coda_clear_inode(struct inode *inode) +static void coda_evict_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); coda_cache_clear_inode(inode); } diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 0435886e4a9..4b5de6c6e0f 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -122,7 +122,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) } /** - * ecryptfs_clear_inode + * ecryptfs_evict_inode * @inode - The ecryptfs inode * * Called by iput() when the inode reference count reached zero @@ -131,8 +131,10 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) * on the inode free list. We use this to drop out reference to the * lower inode. */ -static void ecryptfs_clear_inode(struct inode *inode) +static void ecryptfs_evict_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); iput(ecryptfs_inode_to_lower(inode)); } @@ -184,6 +186,6 @@ const struct super_operations ecryptfs_sops = { .drop_inode = generic_delete_inode, .statfs = ecryptfs_statfs, .remount_fs = NULL, - .clear_inode = ecryptfs_clear_inode, + .evict_inode = ecryptfs_evict_inode, .show_options = ecryptfs_show_options }; diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 50ab5eecb99..881aa3d217f 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -63,7 +63,7 @@ extern void vxfs_put_fake_inode(struct inode *); extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); extern struct inode * vxfs_iget(struct super_block *, ino_t); -extern void vxfs_clear_inode(struct inode *); +extern void vxfs_evict_inode(struct inode *); /* vxfs_lookup.c */ extern const struct inode_operations vxfs_dir_inode_ops; diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 03a6ea5e99f..79d1b4ea13e 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -337,15 +337,17 @@ vxfs_iget(struct super_block *sbp, ino_t ino) } /** - * vxfs_clear_inode - remove inode from main memory + * vxfs_evict_inode - remove inode from main memory * @ip: inode to discard. * * Description: - * vxfs_clear_inode() is called on the final iput and frees the private + * vxfs_evict_inode() is called on the final iput and frees the private * inode area. */ void -vxfs_clear_inode(struct inode *ip) +vxfs_evict_inode(struct inode *ip) { + truncate_inode_pages(&ip->i_data, 0); + end_writeback(ip); kmem_cache_free(vxfs_inode_cachep, ip->i_private); } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 1e8af939b3e..1f3ffd93b35 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -61,7 +61,7 @@ static int vxfs_statfs(struct dentry *, struct kstatfs *); static int vxfs_remount(struct super_block *, int *, char *); static const struct super_operations vxfs_super_ops = { - .clear_inode = vxfs_clear_inode, + .evict_inode = vxfs_evict_inode, .put_super = vxfs_put_super, .statfs = vxfs_statfs, .remount_fs = vxfs_remount, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ec14d19ce50..da9e6e11374 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -122,8 +122,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, fuse_request_send_noreply(fc, req); } -static void fuse_clear_inode(struct inode *inode) +static void fuse_evict_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); if (inode->i_sb->s_flags & MS_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -736,7 +738,7 @@ static const struct export_operations fuse_export_operations = { static const struct super_operations fuse_super_operations = { .alloc_inode = fuse_alloc_inode, .destroy_inode = fuse_destroy_inode, - .clear_inode = fuse_clear_inode, + .evict_inode = fuse_evict_inode, .drop_inode = generic_delete_inode, .remount_fs = fuse_remount_fs, .put_super = fuse_put_super, diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index fe35e3b626c..4f55651aaa5 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -193,7 +193,7 @@ extern int hfs_inode_setattr(struct dentry *, struct iattr *); extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, __be32 log_size, __be32 phys_size, u32 clump_size); extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *); -extern void hfs_clear_inode(struct inode *); +extern void hfs_evict_inode(struct inode *); extern void hfs_delete_inode(struct inode *); /* attr.c */ diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 93ceec8fbb8..397b7adc7ce 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -531,8 +531,10 @@ out: return NULL; } -void hfs_clear_inode(struct inode *inode) +void hfs_evict_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; iput(HFS_I(inode)->rsrc_inode); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 0a81eb7111f..34235d4bf08 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -181,7 +181,7 @@ static const struct super_operations hfs_super_operations = { .alloc_inode = hfs_alloc_inode, .destroy_inode = hfs_destroy_inode, .write_inode = hfs_write_inode, - .clear_inode = hfs_clear_inode, + .evict_inode = hfs_evict_inode, .put_super = hfs_put_super, .write_super = hfs_write_super, .sync_fs = hfs_sync_fs, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index a32c241e4e4..3b55c050c74 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -145,9 +145,11 @@ static int hfsplus_write_inode(struct inode *inode, return ret; } -static void hfsplus_clear_inode(struct inode *inode) +static void hfsplus_evict_inode(struct inode *inode) { - dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino); + dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); if (HFSPLUS_IS_RSRC(inode)) { HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; iput(HFSPLUS_I(inode).rsrc_inode); @@ -293,7 +295,7 @@ static const struct super_operations hfsplus_sops = { .alloc_inode = hfsplus_alloc_inode, .destroy_inode = hfsplus_destroy_inode, .write_inode = hfsplus_write_inode, - .clear_inode = hfsplus_clear_inode, + .evict_inode = hfsplus_evict_inode, .put_super = hfsplus_put_super, .write_super = hfsplus_write_super, .sync_fs = hfsplus_sync_fs, diff --git a/fs/inode.c b/fs/inode.c index 0e077619cbf..5daeb0b8fb5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -317,8 +317,6 @@ static void evict(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); invalidate_inode_buffers(inode); end_writeback(inode); - if (op->clear_inode) - op->clear_inode(inode); } if (S_ISBLK(inode->i_mode) && inode->i_bdev) bd_forget(inode); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 1b2426604fe..ac0638f0496 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -225,7 +225,7 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf) } -void jffs2_clear_inode (struct inode *inode) +void jffs2_evict_inode (struct inode *inode) { /* We can forget about this inode for now - drop all * the nodelists associated with it, etc. @@ -233,7 +233,9 @@ void jffs2_clear_inode (struct inode *inode) struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); + D1(printk(KERN_DEBUG "jffs2_evict_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); jffs2_do_clear_inode(c, f); } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 4791aacf308..00bae7cc2e4 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -171,7 +171,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations; int jffs2_setattr (struct dentry *, struct iattr *); int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); -void jffs2_clear_inode (struct inode *); +void jffs2_evict_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 511e2d609d1..662bba09950 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -135,7 +135,7 @@ static const struct super_operations jffs2_super_operations = .write_super = jffs2_write_super, .statfs = jffs2_statfs, .remount_fs = jffs2_remount_fs, - .clear_inode = jffs2_clear_inode, + .evict_inode = jffs2_evict_inode, .dirty_inode = jffs2_dirty_inode, .sync_fs = jffs2_sync_fs, }; diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index d258e261bdc..9b572ca40a4 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -588,7 +588,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) { - /* It's called from jffs2_clear_inode() on inode removing. + /* It's called from jffs2_evict_inode() on inode removing. When an inode with XATTR is removed, those XATTRs must be removed. */ struct jffs2_xattr_ref *ref, *_ref; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 099b3518fee..c211b8168e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -98,7 +98,7 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -void nfs_clear_inode(struct inode *inode) +static void nfs_clear_inode(struct inode *inode) { /* * The following should never happen... @@ -110,6 +110,13 @@ void nfs_clear_inode(struct inode *inode) nfs_fscache_release_inode_cookie(inode); } +void nfs_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); + nfs_clear_inode(inode); +} + /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk */ @@ -1338,8 +1345,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * to open() calls that passed nfs_atomic_lookup, but failed to call * nfs_open(). */ -void nfs4_clear_inode(struct inode *inode) +void nfs4_evict_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); /* If we are holding a delegation, return it! */ nfs_inode_return_delegation_noreclaim(inode); /* First call standard NFS clear_inode() code */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e70f44b9b3f..f168ebdf7c6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -213,9 +213,9 @@ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); -extern void nfs_clear_inode(struct inode *); +extern void nfs_evict_inode(struct inode *); #ifdef CONFIG_NFS_V4 -extern void nfs4_clear_inode(struct inode *); +extern void nfs4_evict_inode(struct inode *); #endif void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f9df16de4a5..ef2b7e468a7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -270,7 +270,7 @@ static const struct super_operations nfs_sops = { .write_inode = nfs_write_inode, .put_super = nfs_put_super, .statfs = nfs_statfs, - .clear_inode = nfs_clear_inode, + .evict_inode = nfs_evict_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, .show_stats = nfs_show_stats, @@ -340,7 +340,7 @@ static const struct super_operations nfs4_sops = { .write_inode = nfs_write_inode, .put_super = nfs_put_super, .statfs = nfs_statfs, - .clear_inode = nfs4_clear_inode, + .evict_inode = nfs4_evict_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, .show_stats = nfs_show_stats, diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index fdef8f729c3..93622b175fc 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2238,7 +2238,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni) } /** - * ntfs_clear_big_inode - clean up the ntfs specific part of an inode + * ntfs_evict_big_inode - clean up the ntfs specific part of an inode * @vi: vfs inode pending annihilation * * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() @@ -2247,10 +2247,13 @@ void ntfs_clear_extent_inode(ntfs_inode *ni) * * If the MFT record is dirty, we commit it before doing anything else. */ -void ntfs_clear_big_inode(struct inode *vi) +void ntfs_evict_big_inode(struct inode *vi) { ntfs_inode *ni = NTFS_I(vi); + truncate_inode_pages(&vi->i_data, 0); + end_writeback(vi); + #ifdef NTFS_RW if (NInoDirty(ni)) { bool was_bad = (is_bad_inode(vi)); diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 9a113544605..2dabf813456 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -279,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); extern void ntfs_destroy_big_inode(struct inode *inode); -extern void ntfs_clear_big_inode(struct inode *vi); +extern void ntfs_evict_big_inode(struct inode *vi); extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni); diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0de1db6cddb..512806171bf 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2700,7 +2700,7 @@ static const struct super_operations ntfs_sops = { .put_super = ntfs_put_super, /* Syscall: umount. */ .statfs = ntfs_statfs, /* Syscall: statfs */ .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ - .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is + .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is removed from memory. */ //.umount_begin = NULL, /* Forced umount. */ .show_options = ntfs_show_options, /* Show mount options in diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 85e4ccaedd1..a43ebb11ad3 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -357,13 +357,12 @@ static void dlmfs_destroy_inode(struct inode *inode) kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } -static void dlmfs_clear_inode(struct inode *inode) +static void dlmfs_evict_inode(struct inode *inode) { int status; struct dlmfs_inode_private *ip; - if (!inode) - return; + end_writeback(inode); mlog(0, "inode %lu\n", inode->i_ino); @@ -633,7 +632,7 @@ static const struct super_operations dlmfs_ops = { .statfs = simple_statfs, .alloc_inode = dlmfs_alloc_inode, .destroy_inode = dlmfs_destroy_inode, - .clear_inode = dlmfs_clear_inode, + .evict_inode = dlmfs_evict_inode, .drop_inode = generic_delete_inode, }; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 758df94690e..15c35b62ff1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1100,13 +1100,15 @@ xfs_fs_write_inode( } STATIC void -xfs_fs_clear_inode( +xfs_fs_evict_inode( struct inode *inode) { xfs_inode_t *ip = XFS_I(inode); - trace_xfs_clear_inode(ip); + trace_xfs_evict_inode(ip); + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_DEC(vn_active); @@ -1622,7 +1624,7 @@ static const struct super_operations xfs_super_operations = { .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .write_inode = xfs_fs_write_inode, - .clear_inode = xfs_fs_clear_inode, + .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index c657cdca2cd..be5dffd282a 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -581,7 +581,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr); DEFINE_INODE_EVENT(xfs_file_fsync); DEFINE_INODE_EVENT(xfs_destroy_inode); DEFINE_INODE_EVENT(xfs_write_inode); -DEFINE_INODE_EVENT(xfs_clear_inode); +DEFINE_INODE_EVENT(xfs_evict_inode); DEFINE_INODE_EVENT(xfs_dquot_dqalloc); DEFINE_INODE_EVENT(xfs_dquot_dqdetach); diff --git a/include/linux/fs.h b/include/linux/fs.h index 8553adbda57..dec9ac59885 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1571,7 +1571,6 @@ struct super_operations { int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); - void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); @@ -1616,7 +1615,7 @@ struct super_operations { * I_FREEING Set when inode is about to be freed but still has dirty * pages or buffers attached or the inode itself is still * dirty. - * I_CLEAR Added by clear_inode(). In this state the inode is clean + * I_CLEAR Added by end_writeback(). In this state the inode is clean * and can be destroyed. Inode keeps I_FREEING. * * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are -- cgit v1.2.3-70-g09d2 From b70a3e0702dee2ed9435e06a8bde7d9fa2228895 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Jun 2010 14:35:46 -0400 Subject: All filesystems that need invalidate_inode_buffers() are doing that explicitly Signed-off-by: Al Viro --- fs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index 5daeb0b8fb5..2575244640a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -315,7 +315,6 @@ static void evict(struct inode *inode) } else { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); - invalidate_inode_buffers(inode); end_writeback(inode); } if (S_ISBLK(inode->i_mode) && inode->i_bdev) -- cgit v1.2.3-70-g09d2 From 336fb3b97b78edc65bae0b223b83bf676cfe29e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Jun 2010 00:37:12 -0400 Subject: update VFS documentation for method changes. Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 22 ++++++++++++---------- Documentation/filesystems/porting | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 96d4293607e..bbcc15651a2 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -92,8 +92,8 @@ prototypes: void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); - void (*drop_inode) (struct inode *); - void (*delete_inode) (struct inode *); + int (*drop_inode) (struct inode *); + void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); @@ -101,14 +101,13 @@ prototypes: int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); - void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); locking rules: - All may block. + All may block [not true, see below] None have BKL s_umount alloc_inode: @@ -116,22 +115,25 @@ destroy_inode: dirty_inode: (must not sleep) write_inode: drop_inode: !!!inode_lock!!! -delete_inode: +evict_inode: put_super: write write_super: read sync_fs: read freeze_fs: read unfreeze_fs: read -statfs: no -remount_fs: maybe (see below) -clear_inode: +statfs: maybe(read) (see below) +remount_fs: write umount_begin: no show_options: no (namespace_sem) quota_read: no (see below) quota_write: no (see below) -->remount_fs() will have the s_umount exclusive lock if it's already mounted. -When called from get_sb_single, it does NOT have the s_umount lock. +->statfs() has s_umount (shared) when called by ustat(2) (native or +compat), but that's an accident of bad API; s_umount is used to pin +the superblock down when we only have dev_t given us by userland to +identify the superblock. Everything else (statfs(), fstatfs(), etc.) +doesn't hold it when calling ->statfs() - superblock is pinned down +by resolving the pathname passed to syscall. ->quota_read() and ->quota_write() functions are both guaranteed to be the only ones operating on the quota file by the quota code (via dqio_sem) (unless an admin really wants to screw up something and diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index f9547a5c187..b12c8953868 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -291,3 +291,30 @@ be in order of zeroing blocks using block_truncate_page or similar helpers, size update and on finally on-disk truncation which should not fail. inode_change_ok now includes the size checks for ATTR_SIZE and must be called in the beginning of ->setattr unconditionally. + +[mandatory] + + ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should +be used instead. It gets called whenever the inode is evicted, whether it has +remaining links or not. Caller does *not* evict the pagecache or inode-associated +metadata buffers; getting rid of those is responsibility of method, as it had +been for ->delete_inode(). + ->drop_inode() returns int now; it's called on final iput() with inode_lock +held and it returns true if filesystems wants the inode to be dropped. As before, +generic_drop_inode() is still the default and it's been updated appropriately. +generic_delete_inode() is also alive and it consists simply of return 1. Note that +all actual eviction work is done by caller after ->drop_inode() returns. + clear_inode() is gone; use end_writeback() instead. As before, it must +be called exactly once on each call of ->evict_inode() (as it used to be for +each call of ->delete_inode()). Unlike before, if you are using inode-associated +metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to +call invalidate_inode_buffers() before end_writeback(). + No async writeback (and thus no calls of ->write_inode()) will happen +after end_writeback() returns, so actions that should not overlap with ->write_inode() +(e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call. + + NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out +if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput() +may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly +free the on-disk inode, you may end up doing that while ->write_inode() is writing +to it. -- cgit v1.2.3-70-g09d2 From ebabe9a9001af0af56c0c2780ca1576246e7a74b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Jul 2010 18:53:11 +0200 Subject: pass a struct path to vfs_statfs We'll need the path to implement the flags field for statvfs support. We do have it available in all callers except: - ecryptfs_statfs. This one doesn't actually need vfs_statfs but just needs to do a caller to the lower filesystem statfs method. - sys_ustat. Add a non-exported statfs_by_dentry helper for it which doesn't won't be able to fill out the flags field later on. In addition rename the helpers for statfs vs fstatfs to do_*statfs instead of the misleading vfs prefix. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 8 ++++---- arch/parisc/hpux/sys_hpux.c | 10 ++++----- fs/cachefiles/bind.c | 2 +- fs/cachefiles/daemon.c | 6 +++++- fs/compat.c | 10 ++++----- fs/ecryptfs/super.c | 6 +++++- fs/nfsd/nfs4xdr.c | 6 +++++- fs/nfsd/vfs.c | 10 +++++++-- fs/statfs.c | 50 +++++++++++++++++++++++---------------------- include/linux/fs.h | 3 ++- kernel/acct.c | 2 +- 11 files changed, 67 insertions(+), 46 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index de9d3971780..88131c6e42e 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -234,11 +234,11 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st } static int -do_osf_statfs(struct dentry * dentry, struct osf_statfs __user *buffer, +do_osf_statfs(struct path *path, struct osf_statfs __user *buffer, unsigned long bufsiz) { struct kstatfs linux_stat; - int error = vfs_statfs(dentry, &linux_stat); + int error = vfs_statfs(path, &linux_stat); if (!error) error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); return error; @@ -252,7 +252,7 @@ SYSCALL_DEFINE3(osf_statfs, char __user *, pathname, retval = user_path(pathname, &path); if (!retval) { - retval = do_osf_statfs(path.dentry, buffer, bufsiz); + retval = do_osf_statfs(&path buffer, bufsiz); path_put(&path); } return retval; @@ -267,7 +267,7 @@ SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, retval = -EBADF; file = fget(fd); if (file) { - retval = do_osf_statfs(file->f_path.dentry, buffer, bufsiz); + retval = do_osf_statfs(&file->f_path, buffer, bufsiz); fput(file); } return retval; diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index 92343bd35fa..ba430a03bc7 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c @@ -145,7 +145,7 @@ static int hpux_ustat(dev_t dev, struct hpux_ustat __user *ubuf) s = user_get_super(dev); if (s == NULL) goto out; - err = vfs_statfs(s->s_root, &sbuf); + err = statfs_by_dentry(s->s_root, &sbuf); drop_super(s); if (err) goto out; @@ -186,12 +186,12 @@ struct hpux_statfs { int16_t f_pad; }; -static int vfs_statfs_hpux(struct dentry *dentry, struct hpux_statfs *buf) +static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) { struct kstatfs st; int retval; - retval = vfs_statfs(dentry, &st); + retval = vfs_statfs(path, &st); if (retval) return retval; @@ -219,7 +219,7 @@ asmlinkage long hpux_statfs(const char __user *pathname, error = user_path(pathname, &path); if (!error) { struct hpux_statfs tmp; - error = vfs_statfs_hpux(path.dentry, &tmp); + error = do_statfs_hpux(&path, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; path_put(&path); @@ -237,7 +237,7 @@ asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf) file = fget(fd); if (!file) goto out; - error = vfs_statfs_hpux(file->f_path.dentry, &tmp); + error = do_statfs_hpux(&file->f_path, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; fput(file); diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index 2906077ac79..a2603e7c0bb 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -146,7 +146,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) goto error_unsupported; /* get the cache size and blocksize */ - ret = vfs_statfs(root, &stats); + ret = vfs_statfs(&path, &stats); if (ret < 0) goto error_unsupported; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index c2413561ea7..24eb0d37241 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -683,6 +683,10 @@ int cachefiles_has_space(struct cachefiles_cache *cache, unsigned fnr, unsigned bnr) { struct kstatfs stats; + struct path path = { + .mnt = cache->mnt, + .dentry = cache->mnt->mnt_root, + }; int ret; //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", @@ -697,7 +701,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache, /* find out how many pages of blockdev are available */ memset(&stats, 0, sizeof(stats)); - ret = vfs_statfs(cache->mnt->mnt_root, &stats); + ret = vfs_statfs(&path, &stats); if (ret < 0) { if (ret == -EIO) cachefiles_io_error(cache, "statfs failed"); diff --git a/fs/compat.c b/fs/compat.c index 6490d2134ff..fc6c2adf2f6 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -266,7 +266,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta error = user_path(pathname, &path); if (!error) { struct kstatfs tmp; - error = vfs_statfs(path.dentry, &tmp); + error = vfs_statfs(&path, &tmp); if (!error) error = put_compat_statfs(buf, &tmp); path_put(&path); @@ -284,7 +284,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user file = fget(fd); if (!file) goto out; - error = vfs_statfs(file->f_path.dentry, &tmp); + error = vfs_statfs(&file->f_path, &tmp); if (!error) error = put_compat_statfs(buf, &tmp); fput(file); @@ -334,7 +334,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s error = user_path(pathname, &path); if (!error) { struct kstatfs tmp; - error = vfs_statfs(path.dentry, &tmp); + error = vfs_statfs(&path, &tmp); if (!error) error = put_compat_statfs64(buf, &tmp); path_put(&path); @@ -355,7 +355,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c file = fget(fd); if (!file) goto out; - error = vfs_statfs(file->f_path.dentry, &tmp); + error = vfs_statfs(&file->f_path, &tmp); if (!error) error = put_compat_statfs64(buf, &tmp); fput(file); @@ -378,7 +378,7 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) sb = user_get_super(new_decode_dev(dev)); if (!sb) return -EINVAL; - err = vfs_statfs(sb->s_root, &sbuf); + err = statfs_by_dentry(sb->s_root, &sbuf); drop_super(sb); if (err) return err; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 4b5de6c6e0f..f7fc286a3aa 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -118,7 +118,11 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) */ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf); + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + + if (!lower_dentry->d_sb->s_op->statfs) + return -ENOSYS; + return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); } /** diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ac17a708023..4d6154f66e0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1756,6 +1756,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct nfs4_acl *acl = NULL; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; + struct path path = { + .mnt = exp->ex_path.mnt, + .dentry = dentry, + }; BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); @@ -1776,7 +1780,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { - err = vfs_statfs(dentry, &statfs); + err = vfs_statfs(&path, &statfs); if (err) goto out_nfserr; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3c111120b61..f6f1a718642 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2019,8 +2019,14 @@ out: __be32 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) { - __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); - if (!err && vfs_statfs(fhp->fh_dentry,stat)) + struct path path = { + .mnt = fhp->fh_export->ex_path.mnt, + .dentry = fhp->fh_dentry, + }; + __be32 err; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); + if (!err && vfs_statfs(&path, stat)) err = nfserr_io; return err; } diff --git a/fs/statfs.c b/fs/statfs.c index 4ef021f3b61..6a305709a4d 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -7,33 +7,35 @@ #include #include -int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) +int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) { - int retval = -ENODEV; - - if (dentry) { - retval = -ENOSYS; - if (dentry->d_sb->s_op->statfs) { - memset(buf, 0, sizeof(*buf)); - retval = security_sb_statfs(dentry); - if (retval) - return retval; - retval = dentry->d_sb->s_op->statfs(dentry, buf); - if (retval == 0 && buf->f_frsize == 0) - buf->f_frsize = buf->f_bsize; - } - } + int retval; + + if (!dentry->d_sb->s_op->statfs) + return -ENOSYS; + + memset(buf, 0, sizeof(*buf)); + retval = security_sb_statfs(dentry); + if (retval) + return retval; + retval = dentry->d_sb->s_op->statfs(dentry, buf); + if (retval == 0 && buf->f_frsize == 0) + buf->f_frsize = buf->f_bsize; return retval; } +int vfs_statfs(struct path *path, struct kstatfs *buf) +{ + return statfs_by_dentry(path->dentry, buf); +} EXPORT_SYMBOL(vfs_statfs); -static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) +static int do_statfs_native(struct path *path, struct statfs *buf) { struct kstatfs st; int retval; - retval = vfs_statfs(dentry, &st); + retval = vfs_statfs(path, &st); if (retval) return retval; @@ -72,12 +74,12 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) return 0; } -static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) +static int do_statfs64(struct path *path, struct statfs64 *buf) { struct kstatfs st; int retval; - retval = vfs_statfs(dentry, &st); + retval = vfs_statfs(path, &st); if (retval) return retval; @@ -107,7 +109,7 @@ SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, b error = user_path(pathname, &path); if (!error) { struct statfs tmp; - error = vfs_statfs_native(path.dentry, &tmp); + error = do_statfs_native(&path, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; path_put(&path); @@ -125,7 +127,7 @@ SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct stat error = user_path(pathname, &path); if (!error) { struct statfs64 tmp; - error = vfs_statfs64(path.dentry, &tmp); + error = do_statfs64(&path, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; path_put(&path); @@ -143,7 +145,7 @@ SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) file = fget(fd); if (!file) goto out; - error = vfs_statfs_native(file->f_path.dentry, &tmp); + error = do_statfs_native(&file->f_path, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; fput(file); @@ -164,7 +166,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user file = fget(fd); if (!file) goto out; - error = vfs_statfs64(file->f_path.dentry, &tmp); + error = do_statfs64(&file->f_path, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; fput(file); @@ -183,7 +185,7 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) if (!s) return -EINVAL; - err = vfs_statfs(s->s_root, &sbuf); + err = statfs_by_dentry(s->s_root, &sbuf); drop_super(s); if (err) return err; diff --git a/include/linux/fs.h b/include/linux/fs.h index dec9ac59885..9bedf4219f8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1813,7 +1813,8 @@ extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, struct vfsmount *); -extern int vfs_statfs(struct dentry *, struct kstatfs *); +extern int vfs_statfs(struct path *, struct kstatfs *); +extern int statfs_by_dentry(struct dentry *, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); diff --git a/kernel/acct.c b/kernel/acct.c index 385b88461c2..fa7eb3de2dd 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -122,7 +122,7 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) spin_unlock(&acct_lock); /* May block */ - if (vfs_statfs(file->f_path.dentry, &sbuf)) + if (vfs_statfs(&file->f_path, &sbuf)) return res; suspend = sbuf.f_blocks * SUSPEND; resume = sbuf.f_blocks * RESUME; -- cgit v1.2.3-70-g09d2 From 365b18189789bfa1acd9939e6312b8a4b4577b28 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Jul 2010 18:53:25 +0200 Subject: add f_flags to struct statfs(64) Add a flags field to help glibc implementing statvfs(3) efficiently. We copy the flag values from glibc, and add a new ST_VALID flag to denote that f_flags is implemented. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/mips/include/asm/statfs.h | 12 +++++++---- arch/s390/include/asm/statfs.h | 9 +++++--- fs/statfs.c | 47 +++++++++++++++++++++++++++++++++++++++++- include/asm-generic/statfs.h | 9 +++++--- include/linux/statfs.h | 25 ++++++++++++++++++++-- 5 files changed, 89 insertions(+), 13 deletions(-) diff --git a/arch/mips/include/asm/statfs.h b/arch/mips/include/asm/statfs.h index c3ddf973c1c..0f805c7a42a 100644 --- a/arch/mips/include/asm/statfs.h +++ b/arch/mips/include/asm/statfs.h @@ -33,7 +33,8 @@ struct statfs { /* Linux specials */ __kernel_fsid_t f_fsid; long f_namelen; - long f_spare[6]; + long f_flags; + long f_spare[5]; }; #if (_MIPS_SIM == _MIPS_SIM_ABI32) || (_MIPS_SIM == _MIPS_SIM_NABI32) @@ -53,7 +54,8 @@ struct statfs64 { __u64 f_bavail; __kernel_fsid_t f_fsid; __u32 f_namelen; - __u32 f_spare[6]; + __u32 f_flags; + __u32 f_spare[5]; }; #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ @@ -73,7 +75,8 @@ struct statfs64 { /* Same as struct statfs */ /* Linux specials */ __kernel_fsid_t f_fsid; long f_namelen; - long f_spare[6]; + long f_flags; + long f_spare[5]; }; struct compat_statfs64 { @@ -88,7 +91,8 @@ struct compat_statfs64 { __u64 f_bavail; __kernel_fsid_t f_fsid; __u32 f_namelen; - __u32 f_spare[6]; + __u32 f_flags; + __u32 f_spare[5]; }; #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ diff --git a/arch/s390/include/asm/statfs.h b/arch/s390/include/asm/statfs.h index 06cc70307ec..3be7fbd406c 100644 --- a/arch/s390/include/asm/statfs.h +++ b/arch/s390/include/asm/statfs.h @@ -33,7 +33,8 @@ struct statfs { __kernel_fsid_t f_fsid; int f_namelen; int f_frsize; - int f_spare[5]; + int f_flags; + int f_spare[4]; }; struct statfs64 { @@ -47,7 +48,8 @@ struct statfs64 { __kernel_fsid_t f_fsid; int f_namelen; int f_frsize; - int f_spare[5]; + int f_flags; + int f_spare[4]; }; struct compat_statfs64 { @@ -61,7 +63,8 @@ struct compat_statfs64 { __kernel_fsid_t f_fsid; __u32 f_namelen; __u32 f_frsize; - __u32 f_spare[5]; + __u32 f_flags; + __u32 f_spare[4]; }; #endif /* __s390x__ */ diff --git a/fs/statfs.c b/fs/statfs.c index 6a305709a4d..30ea8c8a996 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -2,11 +2,49 @@ #include #include #include +#include #include #include #include #include +static int flags_by_mnt(int mnt_flags) +{ + int flags = 0; + + if (mnt_flags & MNT_READONLY) + flags |= ST_RDONLY; + if (mnt_flags & MNT_NOSUID) + flags |= ST_NOSUID; + if (mnt_flags & MNT_NODEV) + flags |= ST_NODEV; + if (mnt_flags & MNT_NOEXEC) + flags |= ST_NOEXEC; + if (mnt_flags & MNT_NOATIME) + flags |= ST_NOATIME; + if (mnt_flags & MNT_NODIRATIME) + flags |= ST_NODIRATIME; + if (mnt_flags & MNT_RELATIME) + flags |= ST_RELATIME; + return flags; +} + +static int flags_by_sb(int s_flags) +{ + int flags = 0; + if (s_flags & MS_SYNCHRONOUS) + flags |= ST_SYNCHRONOUS; + if (s_flags & MS_MANDLOCK) + flags |= ST_MANDLOCK; + return flags; +} + +static int calculate_f_flags(struct vfsmount *mnt) +{ + return ST_VALID | flags_by_mnt(mnt->mnt_flags) | + flags_by_sb(mnt->mnt_sb->s_flags); +} + int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) { int retval; @@ -26,7 +64,12 @@ int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) int vfs_statfs(struct path *path, struct kstatfs *buf) { - return statfs_by_dentry(path->dentry, buf); + int error; + + error = statfs_by_dentry(path->dentry, buf); + if (!error) + buf->f_flags = calculate_f_flags(path->mnt); + return error; } EXPORT_SYMBOL(vfs_statfs); @@ -69,6 +112,7 @@ static int do_statfs_native(struct path *path, struct statfs *buf) buf->f_fsid = st.f_fsid; buf->f_namelen = st.f_namelen; buf->f_frsize = st.f_frsize; + buf->f_flags = st.f_flags; memset(buf->f_spare, 0, sizeof(buf->f_spare)); } return 0; @@ -96,6 +140,7 @@ static int do_statfs64(struct path *path, struct statfs64 *buf) buf->f_fsid = st.f_fsid; buf->f_namelen = st.f_namelen; buf->f_frsize = st.f_frsize; + buf->f_flags = st.f_flags; memset(buf->f_spare, 0, sizeof(buf->f_spare)); } return 0; diff --git a/include/asm-generic/statfs.h b/include/asm-generic/statfs.h index 3b4fb3e52f0..0fd28e028de 100644 --- a/include/asm-generic/statfs.h +++ b/include/asm-generic/statfs.h @@ -33,7 +33,8 @@ struct statfs { __kernel_fsid_t f_fsid; __statfs_word f_namelen; __statfs_word f_frsize; - __statfs_word f_spare[5]; + __statfs_word f_flags; + __statfs_word f_spare[4]; }; /* @@ -55,7 +56,8 @@ struct statfs64 { __kernel_fsid_t f_fsid; __statfs_word f_namelen; __statfs_word f_frsize; - __statfs_word f_spare[5]; + __statfs_word f_flags; + __statfs_word f_spare[4]; } ARCH_PACK_STATFS64; /* @@ -77,7 +79,8 @@ struct compat_statfs64 { __kernel_fsid_t f_fsid; __u32 f_namelen; __u32 f_frsize; - __u32 f_spare[5]; + __u32 f_flags; + __u32 f_spare[4]; } ARCH_PACK_COMPAT_STATFS64; #endif diff --git a/include/linux/statfs.h b/include/linux/statfs.h index b34cc829f98..0166d320a75 100644 --- a/include/linux/statfs.h +++ b/include/linux/statfs.h @@ -2,7 +2,6 @@ #define _LINUX_STATFS_H #include - #include struct kstatfs { @@ -16,7 +15,29 @@ struct kstatfs { __kernel_fsid_t f_fsid; long f_namelen; long f_frsize; - long f_spare[5]; + long f_flags; + long f_spare[4]; }; +/* + * Definitions for the flag in f_flag. + * + * Generally these flags are equivalent to the MS_ flags used in the mount + * ABI. The exception is ST_VALID which has the same value as MS_REMOUNT + * which doesn't make any sense for statfs. + */ +#define ST_RDONLY 0x0001 /* mount read-only */ +#define ST_NOSUID 0x0002 /* ignore suid and sgid bits */ +#define ST_NODEV 0x0004 /* disallow access to device special files */ +#define ST_NOEXEC 0x0008 /* disallow program execution */ +#define ST_SYNCHRONOUS 0x0010 /* writes are synced at once */ +#define ST_VALID 0x0020 /* f_flags support is implemented */ +#define ST_MANDLOCK 0x0040 /* allow mandatory locks on an FS */ +/* 0x0080 used for ST_WRITE in glibc */ +/* 0x0100 used for ST_APPEND in glibc */ +/* 0x0200 used for ST_IMMUTABLE in glibc */ +#define ST_NOATIME 0x0400 /* do not update access times */ +#define ST_NODIRATIME 0x0800 /* do not update directory access times */ +#define ST_RELATIME 0x1000 /* update atime relative to mtime/ctime */ + #endif -- cgit v1.2.3-70-g09d2 From 2aec7c523291621ebb68ba8e0bd9b52a26bb76ee Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 19 Jul 2010 18:19:41 +0200 Subject: mbcache: Remove unused features The mbcache code was written to support a variable number of indexes, but all the existing users use exactly one index. Simplify to code to support only that case. There are also no users of the cache entry free operation, and none of the users keep extra data in cache entries. Remove those features as well. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/ext2/xattr.c | 12 ++--- fs/ext3/xattr.c | 12 ++--- fs/ext4/xattr.c | 12 ++--- fs/mbcache.c | 141 ++++++++++++++---------------------------------- include/linux/mbcache.h | 20 ++----- 5 files changed, 60 insertions(+), 137 deletions(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 5ab87e6edff..8c29ae15129 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -843,7 +843,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh) ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); if (!ce) return -ENOMEM; - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); + error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); if (error) { mb_cache_entry_free(ce); if (error == -EBUSY) { @@ -917,8 +917,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); again: - ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, - inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev, + hash); while (ce) { struct buffer_head *bh; @@ -950,7 +950,7 @@ again: unlock_buffer(bh); brelse(bh); } - ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); } return NULL; } @@ -1026,9 +1026,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, int __init init_ext2_xattr(void) { - ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, - sizeof(struct mb_cache_entry) + - sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); + ext2_xattr_cache = mb_cache_create("ext2_xattr", 6); if (!ext2_xattr_cache) return -ENOMEM; return 0; diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 71fb8d65e54..e69dc6dfaa8 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -1139,7 +1139,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh) ea_bdebug(bh, "out of memory"); return; } - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); + error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); if (error) { mb_cache_entry_free(ce); if (error == -EBUSY) { @@ -1211,8 +1211,8 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header, return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); again: - ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, - inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev, + hash); while (ce) { struct buffer_head *bh; @@ -1237,7 +1237,7 @@ again: return bh; } brelse(bh); - ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); } return NULL; } @@ -1313,9 +1313,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header, int __init init_ext3_xattr(void) { - ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, - sizeof(struct mb_cache_entry) + - sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); + ext3_xattr_cache = mb_cache_create("ext3_xattr", 6); if (!ext3_xattr_cache) return -ENOMEM; return 0; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 04338009793..1c93198353e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1418,7 +1418,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh) ea_bdebug(bh, "out of memory"); return; } - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); + error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); if (error) { mb_cache_entry_free(ce); if (error == -EBUSY) { @@ -1490,8 +1490,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); again: - ce = mb_cache_entry_find_first(ext4_xattr_cache, 0, - inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, + hash); while (ce) { struct buffer_head *bh; @@ -1515,7 +1515,7 @@ again: return bh; } brelse(bh); - ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); } return NULL; } @@ -1591,9 +1591,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, int __init init_ext4_xattr(void) { - ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL, - sizeof(struct mb_cache_entry) + - sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); + ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); if (!ext4_xattr_cache) return -ENOMEM; return 0; diff --git a/fs/mbcache.c b/fs/mbcache.c index e28f21b9534..8a2cbd82307 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -79,15 +79,11 @@ EXPORT_SYMBOL(mb_cache_entry_find_next); struct mb_cache { struct list_head c_cache_list; const char *c_name; - struct mb_cache_op c_op; atomic_t c_entry_count; int c_bucket_bits; -#ifndef MB_CACHE_INDEXES_COUNT - int c_indexes_count; -#endif - struct kmem_cache *c_entry_cache; + struct kmem_cache *c_entry_cache; struct list_head *c_block_hash; - struct list_head *c_indexes_hash[0]; + struct list_head *c_index_hash; }; @@ -101,16 +97,6 @@ static LIST_HEAD(mb_cache_list); static LIST_HEAD(mb_cache_lru_list); static DEFINE_SPINLOCK(mb_cache_spinlock); -static inline int -mb_cache_indexes(struct mb_cache *cache) -{ -#ifdef MB_CACHE_INDEXES_COUNT - return MB_CACHE_INDEXES_COUNT; -#else - return cache->c_indexes_count; -#endif -} - /* * What the mbcache registers as to get shrunk dynamically. */ @@ -132,12 +118,9 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) static void __mb_cache_entry_unhash(struct mb_cache_entry *ce) { - int n; - if (__mb_cache_entry_is_hashed(ce)) { list_del_init(&ce->e_block_list); - for (n=0; ne_cache); n++) - list_del(&ce->e_indexes[n].o_list); + list_del(&ce->e_index.o_list); } } @@ -148,16 +131,8 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) struct mb_cache *cache = ce->e_cache; mb_assert(!(ce->e_used || ce->e_queued)); - if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { - /* free failed -- put back on the lru list - for freeing later. */ - spin_lock(&mb_cache_spinlock); - list_add(&ce->e_lru_list, &mb_cache_lru_list); - spin_unlock(&mb_cache_spinlock); - } else { - kmem_cache_free(cache->c_entry_cache, ce); - atomic_dec(&cache->c_entry_count); - } + kmem_cache_free(cache->c_entry_cache, ce); + atomic_dec(&cache->c_entry_count); } @@ -243,72 +218,49 @@ out: * memory was available. * * @name: name of the cache (informal) - * @cache_op: contains the callback called when freeing a cache entry - * @entry_size: The size of a cache entry, including - * struct mb_cache_entry - * @indexes_count: number of additional indexes in the cache. Must equal - * MB_CACHE_INDEXES_COUNT if the number of indexes is - * hardwired. * @bucket_bits: log2(number of hash buckets) */ struct mb_cache * -mb_cache_create(const char *name, struct mb_cache_op *cache_op, - size_t entry_size, int indexes_count, int bucket_bits) +mb_cache_create(const char *name, int bucket_bits) { - int m=0, n, bucket_count = 1 << bucket_bits; + int n, bucket_count = 1 << bucket_bits; struct mb_cache *cache = NULL; - if(entry_size < sizeof(struct mb_cache_entry) + - indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0])) - return NULL; - - cache = kmalloc(sizeof(struct mb_cache) + - indexes_count * sizeof(struct list_head), GFP_KERNEL); + cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); if (!cache) - goto fail; + return NULL; cache->c_name = name; - cache->c_op.free = NULL; - if (cache_op) - cache->c_op.free = cache_op->free; atomic_set(&cache->c_entry_count, 0); cache->c_bucket_bits = bucket_bits; -#ifdef MB_CACHE_INDEXES_COUNT - mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -#else - cache->c_indexes_count = indexes_count; -#endif cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), GFP_KERNEL); if (!cache->c_block_hash) goto fail; for (n=0; nc_block_hash[n]); - for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * - sizeof(struct list_head), - GFP_KERNEL); - if (!cache->c_indexes_hash[m]) - goto fail; - for (n=0; nc_indexes_hash[m][n]); - } - cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, + cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), + GFP_KERNEL); + if (!cache->c_index_hash) + goto fail; + for (n=0; nc_index_hash[n]); + cache->c_entry_cache = kmem_cache_create(name, + sizeof(struct mb_cache_entry), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); if (!cache->c_entry_cache) - goto fail; + goto fail2; spin_lock(&mb_cache_spinlock); list_add(&cache->c_cache_list, &mb_cache_list); spin_unlock(&mb_cache_spinlock); return cache; +fail2: + kfree(cache->c_index_hash); + fail: - if (cache) { - while (--m >= 0) - kfree(cache->c_indexes_hash[m]); - kfree(cache->c_block_hash); - kfree(cache); - } + kfree(cache->c_block_hash); + kfree(cache); return NULL; } @@ -357,7 +309,6 @@ mb_cache_destroy(struct mb_cache *cache) { LIST_HEAD(free_list); struct list_head *l, *ltmp; - int n; spin_lock(&mb_cache_spinlock); list_for_each_safe(l, ltmp, &mb_cache_lru_list) { @@ -384,8 +335,7 @@ mb_cache_destroy(struct mb_cache *cache) kmem_cache_destroy(cache->c_entry_cache); - for (n=0; n < mb_cache_indexes(cache); n++) - kfree(cache->c_indexes_hash[n]); + kfree(cache->c_index_hash); kfree(cache->c_block_hash); kfree(cache); } @@ -429,17 +379,16 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) * * @bdev: device the cache entry belongs to * @block: block number - * @keys: array of additional keys. There must be indexes_count entries - * in the array (as specified when creating the cache). + * @key: lookup key */ int mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, - sector_t block, unsigned int keys[]) + sector_t block, unsigned int key) { struct mb_cache *cache = ce->e_cache; unsigned int bucket; struct list_head *l; - int error = -EBUSY, n; + int error = -EBUSY; bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), cache->c_bucket_bits); @@ -454,12 +403,9 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, ce->e_bdev = bdev; ce->e_block = block; list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); - for (n=0; ne_indexes[n].o_key = keys[n]; - bucket = hash_long(keys[n], cache->c_bucket_bits); - list_add(&ce->e_indexes[n].o_list, - &cache->c_indexes_hash[n][bucket]); - } + ce->e_index.o_key = key; + bucket = hash_long(key, cache->c_bucket_bits); + list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); error = 0; out: spin_unlock(&mb_cache_spinlock); @@ -555,13 +501,12 @@ cleanup: static struct mb_cache_entry * __mb_cache_entry_find(struct list_head *l, struct list_head *head, - int index, struct block_device *bdev, unsigned int key) + struct block_device *bdev, unsigned int key) { while (l != head) { struct mb_cache_entry *ce = - list_entry(l, struct mb_cache_entry, - e_indexes[index].o_list); - if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) { + list_entry(l, struct mb_cache_entry, e_index.o_list); + if (ce->e_bdev == bdev && ce->e_index.o_key == key) { DEFINE_WAIT(wait); if (!list_empty(&ce->e_lru_list)) @@ -603,23 +548,20 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head, * returned cache entry is locked for shared access ("multiple readers"). * * @cache: the cache to search - * @index: the number of the additonal index to search (0<=indexc_bucket_bits); struct list_head *l; struct mb_cache_entry *ce; - mb_assert(index < mb_cache_indexes(cache)); spin_lock(&mb_cache_spinlock); - l = cache->c_indexes_hash[index][bucket].next; - ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], - index, bdev, key); + l = cache->c_index_hash[bucket].next; + ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); spin_unlock(&mb_cache_spinlock); return ce; } @@ -640,12 +582,11 @@ mb_cache_entry_find_first(struct mb_cache *cache, int index, * } * * @prev: The previous match - * @index: the number of the additonal index to search (0<=indexe_cache; @@ -653,11 +594,9 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, struct list_head *l; struct mb_cache_entry *ce; - mb_assert(index < mb_cache_indexes(cache)); spin_lock(&mb_cache_spinlock); - l = prev->e_indexes[index].o_list.next; - ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], - index, bdev, key); + l = prev->e_index.o_list.next; + ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); __mb_cache_entry_release_unlock(prev); return ce; } diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index a09b84e4fdb..54cbbac1e71 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -4,9 +4,6 @@ (C) 2001 by Andreas Gruenbacher, */ -/* Hardwire the number of additional indexes */ -#define MB_CACHE_INDEXES_COUNT 1 - struct mb_cache_entry { struct list_head e_lru_list; struct mb_cache *e_cache; @@ -18,17 +15,12 @@ struct mb_cache_entry { struct { struct list_head o_list; unsigned int o_key; - } e_indexes[0]; -}; - -struct mb_cache_op { - int (*free)(struct mb_cache_entry *, gfp_t); + } e_index; }; /* Functions on caches */ -struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, - int, int); +struct mb_cache *mb_cache_create(const char *, int); void mb_cache_shrink(struct block_device *); void mb_cache_destroy(struct mb_cache *); @@ -36,17 +28,15 @@ void mb_cache_destroy(struct mb_cache *); struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *, gfp_t); int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *, - sector_t, unsigned int[]); + sector_t, unsigned int); void mb_cache_entry_release(struct mb_cache_entry *); void mb_cache_entry_free(struct mb_cache_entry *); struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, struct block_device *, sector_t); -#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, +struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *, unsigned int); -struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, +struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, struct block_device *, unsigned int); -#endif -- cgit v1.2.3-70-g09d2 From e566d48c9bd56f57e25e855a21e06ca2c2525795 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 21 Jul 2010 19:44:45 +0200 Subject: mbcache: fix shrinker function return value The shrinker function is supposed to return the number of cache entries after shrinking, not before shrinking. Fix that. Based on a patch from Wang Sheng-Hui . Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/mbcache.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/fs/mbcache.c b/fs/mbcache.c index 8a2cbd82307..cf4e6cdfd15 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -176,22 +176,12 @@ static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(free_list); - struct list_head *l, *ltmp; + struct mb_cache *cache; + struct mb_cache_entry *entry, *tmp; int count = 0; - spin_lock(&mb_cache_spinlock); - list_for_each(l, &mb_cache_list) { - struct mb_cache *cache = - list_entry(l, struct mb_cache, c_cache_list); - mb_debug("cache %s (%d)", cache->c_name, - atomic_read(&cache->c_entry_count)); - count += atomic_read(&cache->c_entry_count); - } mb_debug("trying to free %d entries", nr_to_scan); - if (nr_to_scan == 0) { - spin_unlock(&mb_cache_spinlock); - goto out; - } + spin_lock(&mb_cache_spinlock); while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { struct mb_cache_entry *ce = list_entry(mb_cache_lru_list.next, @@ -199,12 +189,15 @@ mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) list_move_tail(&ce->e_lru_list, &free_list); __mb_cache_entry_unhash(ce); } + list_for_each_entry(cache, &mb_cache_list, c_cache_list) { + mb_debug("cache %s (%d)", cache->c_name, + atomic_read(&cache->c_entry_count)); + count += atomic_read(&cache->c_entry_count); + } spin_unlock(&mb_cache_spinlock); - list_for_each_safe(l, ltmp, &free_list) { - __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, - e_lru_list), gfp_mask); + list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { + __mb_cache_entry_forget(entry, gfp_mask); } -out: return (count / 100) * sysctl_vfs_cache_pressure; } -- cgit v1.2.3-70-g09d2 From 1b9474635e21eef0f3e69fd1c7b1b9598ffdddd3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Jul 2010 17:51:21 -0400 Subject: cifs: truncate fallout Remove the calls to inode_newsize_ok given that we already did it as part of inode_change_ok in the beginning of cifs_setattr_(no)unix. No need to call ->truncate if cifs doesn't have one, so remove the explicit call in cifs_vmtruncate, and replace the calls to vmtruncate with truncate_setsize which is vmtruncate minus inode_newsize_ok and the call to ->truncate. Rename cifs_vmtruncate to cifs_setsize to match the new calling conventions. Question 1: why does cifs do the pagecache munging and i_size update twice for each setattr call, once opencoded in cifs_vmtruncate, and once using the VFS helpers? Question 2: what is supposed to be protected by i_lock in cifs_vmtruncate? Do we need it around the call to inode_change_ok? [AV: fixed build breakage] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/cifs/inode.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b95f4a5af01..ddbe8a84c51 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1679,26 +1679,16 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) return rc; } -static int cifs_vmtruncate(struct inode *inode, loff_t offset) +static void cifs_setsize(struct inode *inode, loff_t offset) { loff_t oldsize; - int err; spin_lock(&inode->i_lock); - err = inode_newsize_ok(inode, offset); - if (err) { - spin_unlock(&inode->i_lock); - goto out; - } - oldsize = inode->i_size; i_size_write(inode, offset); spin_unlock(&inode->i_lock); + truncate_pagecache(inode, oldsize, offset); - if (inode->i_op->truncate) - inode->i_op->truncate(inode); -out: - return err; } static int @@ -1771,7 +1761,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, if (rc == 0) { cifsInode->server_eof = attrs->ia_size; - rc = cifs_vmtruncate(inode, attrs->ia_size); + cifs_setsize(inode, attrs->ia_size); cifs_truncate_page(inode->i_mapping, inode->i_size); } @@ -1891,11 +1881,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) goto out; if ((attrs->ia_valid & ATTR_SIZE) && - attrs->ia_size != i_size_read(inode)) { - rc = vmtruncate(inode, attrs->ia_size); - if (rc) - goto out; - } + attrs->ia_size != i_size_read(inode)) + truncate_setsize(inode, attrs->ia_size); setattr_copy(inode, attrs); mark_inode_dirty(inode); @@ -2050,11 +2037,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) goto cifs_setattr_exit; if ((attrs->ia_valid & ATTR_SIZE) && - attrs->ia_size != i_size_read(inode)) { - rc = vmtruncate(inode, attrs->ia_size); - if (rc) - goto cifs_setattr_exit; - } + attrs->ia_size != i_size_read(inode)) + truncate_setsize(inode, attrs->ia_size); setattr_copy(inode, attrs); mark_inode_dirty(inode); -- cgit v1.2.3-70-g09d2 From 669d5f1f608f7de29bb467bb239517a414e43777 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 5 Jul 2010 15:14:59 +0300 Subject: AFFS: clean up dirty flag usage In 'affs_write_super()': remove ancient and wrong commented code, remove unneeded 'clean' variable, so the function becomes a bit cleaner and simpler. In 'affs_remount(): remove unnecessary SB dirty flag changes. Tested-by: Artem Bityutskiy Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/super.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/fs/affs/super.c b/fs/affs/super.c index 2c804a87c14..fde3b9ae700 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -61,20 +61,13 @@ affs_put_super(struct super_block *sb) static void affs_write_super(struct super_block *sb) { - int clean = 2; - lock_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - // if (sbi->s_bitmap[i].bm_bh) { - // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { - // clean = 0; - affs_commit_super(sb, clean); - sb->s_dirt = !clean; /* redo until bitmap synced */ - } else - sb->s_dirt = 0; + if (!(sb->s_flags & MS_RDONLY)) + affs_commit_super(sb, 2); + sb->s_dirt = 0; unlock_super(sb); - pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); + pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds()); } static int @@ -553,9 +546,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) return 0; } if (*flags & MS_RDONLY) { - sb->s_dirt = 1; - while (sb->s_dirt) - affs_write_super(sb); + affs_write_super(sb); affs_free_bitmap(sb); } else res = affs_init_bitmap(sb, flags); -- cgit v1.2.3-70-g09d2 From 7435d50611b04c1155a939a9f373154a53606592 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 5 Jul 2010 15:15:00 +0300 Subject: AFFS: wait for sb synchronization when needed AFFS does not ever wait for superblock synchronization in ->put_super(), ->write_super, and ->sync_fs(). However, it should wait for synchronization in ->put_super() because it is about to be unmounted, in ->write_super() because this is periodic SB synchronization performed from a separate kernel thread, and in ->sync_fs() it should respect the 'wait' flag. This patch fixes the situation. Also, in ->put_super(), do not write the SB if it is not dirty. Tested-by: Artem Bityutskiy Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/affs/super.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/affs/super.c b/fs/affs/super.c index fde3b9ae700..33c4e7eef47 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -26,7 +26,7 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); static int affs_remount (struct super_block *sb, int *flags, char *data); static void -affs_commit_super(struct super_block *sb, int clean) +affs_commit_super(struct super_block *sb, int wait, int clean) { struct affs_sb_info *sbi = AFFS_SB(sb); struct buffer_head *bh = sbi->s_root_bh; @@ -36,6 +36,8 @@ affs_commit_super(struct super_block *sb, int clean) secs_to_datestamp(get_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); mark_buffer_dirty(bh); + if (wait) + sync_dirty_buffer(bh); } static void @@ -46,8 +48,8 @@ affs_put_super(struct super_block *sb) lock_kernel(); - if (!(sb->s_flags & MS_RDONLY)) - affs_commit_super(sb, 1); + if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) + affs_commit_super(sb, 1, 1); kfree(sbi->s_prefix); affs_free_bitmap(sb); @@ -63,7 +65,7 @@ affs_write_super(struct super_block *sb) { lock_super(sb); if (!(sb->s_flags & MS_RDONLY)) - affs_commit_super(sb, 2); + affs_commit_super(sb, 1, 2); sb->s_dirt = 0; unlock_super(sb); @@ -74,7 +76,7 @@ static int affs_sync_fs(struct super_block *sb, int wait) { lock_super(sb); - affs_commit_super(sb, 2); + affs_commit_super(sb, wait, 2); sb->s_dirt = 0; unlock_super(sb); return 0; -- cgit v1.2.3-70-g09d2 From 4e29d50a28c267bd1d1731a9fb8f773663d93e23 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 5 Jul 2010 15:15:01 +0300 Subject: BFS: clean up the superblock usage BFS is a very simple FS and its superblocks contains only static information and is never changed. However, the BFS code for some misterious reasons marked its buffer head as dirty from time to time, but nothing in that buffer was ever changed. This patch removes all the BFS superblock manipulation, simply because it is not needed. It removes: 1. The si_sbh filed from 'struct bfs_sb_info' because it is not needed. We only need to read the SB once on mount to get the start of data blocks and the FS size. After this, we can forget about the SB. 2. All instances of 'mark_buffer_dirty(sbh)' for BFS SB because it is never changed. 3. The '->sync_fs()' method because there is nothing to sync (inodes are synched by VFS). 4. The '->write_super()' method, again, because the SB is never changed. Tested-by: Artem Bityutskiy Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/bfs/bfs.h | 1 - fs/bfs/file.c | 3 --- fs/bfs/inode.c | 46 +++++++--------------------------------------- 3 files changed, 7 insertions(+), 43 deletions(-) diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 7109e451abf..f7f87e233dd 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -17,7 +17,6 @@ struct bfs_sb_info { unsigned long si_lf_eblk; unsigned long si_lasti; unsigned long *si_imap; - struct buffer_head *si_sbh; /* buffer header w/superblock */ struct mutex bfs_lock; }; diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 8fc2e9c9739..eb67edd0f8e 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -70,7 +70,6 @@ static int bfs_get_block(struct inode *inode, sector_t block, struct super_block *sb = inode->i_sb; struct bfs_sb_info *info = BFS_SB(sb); struct bfs_inode_info *bi = BFS_I(inode); - struct buffer_head *sbh = info->si_sbh; phys = bi->i_sblock + block; if (!create) { @@ -112,7 +111,6 @@ static int bfs_get_block(struct inode *inode, sector_t block, info->si_freeb -= phys - bi->i_eblock; info->si_lf_eblk = bi->i_eblock = phys; mark_inode_dirty(inode); - mark_buffer_dirty(sbh); err = 0; goto out; } @@ -147,7 +145,6 @@ static int bfs_get_block(struct inode *inode, sector_t block, */ info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks; mark_inode_dirty(inode); - mark_buffer_dirty(sbh); map_bh(bh_result, sb, phys); out: mutex_unlock(&info->bfs_lock); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 0499822b156..c4daf0f5fc0 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -31,7 +31,6 @@ MODULE_LICENSE("GPL"); #define dprintf(x...) #endif -static void bfs_write_super(struct super_block *s); void dump_imap(const char *prefix, struct super_block *s); struct inode *bfs_iget(struct super_block *sb, unsigned long ino) @@ -204,33 +203,11 @@ static void bfs_evict_inode(struct inode *inode) * "last block of the last file" even if there is no * real file there, saves us 1 gap. */ - if (info->si_lf_eblk == bi->i_eblock) { + if (info->si_lf_eblk == bi->i_eblock) info->si_lf_eblk = bi->i_sblock - 1; - mark_buffer_dirty(info->si_sbh); - } mutex_unlock(&info->bfs_lock); } -static int bfs_sync_fs(struct super_block *sb, int wait) -{ - struct bfs_sb_info *info = BFS_SB(sb); - - mutex_lock(&info->bfs_lock); - mark_buffer_dirty(info->si_sbh); - sb->s_dirt = 0; - mutex_unlock(&info->bfs_lock); - - return 0; -} - -static void bfs_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - bfs_sync_fs(sb, 1); - else - sb->s_dirt = 0; -} - static void bfs_put_super(struct super_block *s) { struct bfs_sb_info *info = BFS_SB(s); @@ -240,10 +217,6 @@ static void bfs_put_super(struct super_block *s) lock_kernel(); - if (s->s_dirt) - bfs_write_super(s); - - brelse(info->si_sbh); mutex_destroy(&info->bfs_lock); kfree(info->si_imap); kfree(info); @@ -315,8 +288,6 @@ static const struct super_operations bfs_sops = { .write_inode = bfs_write_inode, .evict_inode = bfs_evict_inode, .put_super = bfs_put_super, - .write_super = bfs_write_super, - .sync_fs = bfs_sync_fs, .statfs = bfs_statfs, }; @@ -343,7 +314,7 @@ void dump_imap(const char *prefix, struct super_block *s) static int bfs_fill_super(struct super_block *s, void *data, int silent) { - struct buffer_head *bh; + struct buffer_head *bh, *sbh; struct bfs_super_block *bfs_sb; struct inode *inode; unsigned i, imap_len; @@ -359,10 +330,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) sb_set_blocksize(s, BFS_BSIZE); - info->si_sbh = sb_bread(s, 0); - if (!info->si_sbh) + sbh = sb_bread(s, 0); + if (!sbh) goto out; - bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data; + bfs_sb = (struct bfs_super_block *)sbh->b_data; if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { if (!silent) printf("No BFS filesystem on %s (magic=%08x)\n", @@ -466,10 +437,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) info->si_lf_eblk = eblock; } brelse(bh); - if (!(s->s_flags & MS_RDONLY)) { - mark_buffer_dirty(info->si_sbh); - s->s_dirt = 1; - } + brelse(sbh); dump_imap("read_super", s); return 0; @@ -479,7 +447,7 @@ out3: out2: kfree(info->si_imap); out1: - brelse(info->si_sbh); + brelse(sbh); out: mutex_destroy(&info->bfs_lock); kfree(info); -- cgit v1.2.3-70-g09d2 From 696ac96c2757963cd6751c26215e3c6d328705aa Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 5 Jul 2010 15:15:02 +0300 Subject: btrfs: remove junk sb_dirt change BTRFS does not define a '->write_super()' method, so it should not mark its superblock as dirty. This looks like some left-over. Signed-off-by: Artem Bityutskiy Acked-by: Chris Mason Signed-off-by: Al Viro --- fs/btrfs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2c54f04a0bf..8976c3343a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2938,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, dir->i_mtime = dir->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dir); BUG_ON(ret); - dir->i_sb->s_dirt = 1; btrfs_free_path(path); return 0; -- cgit v1.2.3-70-g09d2 From 315671f3b8091bc8e15035ffeba5f7bff7b8edec Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 5 Jul 2010 15:15:03 +0300 Subject: sysv: do not mark superblock dirty on mount I did not find any docs about this file-system, and I have no possibility to test my changes. Thus, this is untested. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/sysv/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 5a903da5455..0e44a625335 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -347,7 +347,6 @@ static int complete_read_super(struct super_block *sb, int silent, int size) sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) sb->s_root->d_op = &sysv_dentry_operations; - sb->s_dirt = 1; return 1; } -- cgit v1.2.3-70-g09d2 From 719f2c879f4dda7d7f303bd387d37cd96db29d31 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 5 Jul 2010 15:15:04 +0300 Subject: sysv: do not mark superblock dirty on remount No need to mark the superblock as dirty in sysv_remount, synchronize it instead (only if mounting R/O). I did not find any docs about this file-system, and I have no possibility to test my changes. Thus, this is untested. I see other issues in sysv, e.g., why sysv_sync_fs writes only in the FSTYPE_SYSV4 case? However, it marks its SB bh's dirty for all types, and does not wait for them ever. With zero docs I'm unable to fix this. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/sysv/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 613a5056e88..de44d067b9e 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -71,8 +71,8 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) lock_super(sb); if (sbi->s_forced_ro) *flags |= MS_RDONLY; - if (!(*flags & MS_RDONLY)) - sb->s_dirt = 1; + if (*flags & MS_RDONLY) + sysv_write_super(sb); unlock_super(sb); return 0; } -- cgit v1.2.3-70-g09d2 From 4f331f01b9c43bf001d3ffee578a97a1e0633eac Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jul 2010 15:18:07 -0700 Subject: vfs: don't hold s_umount over close_bdev_exclusive() call Fix an obscure AB-BA deadlock in get_sb_bdev(). When a superblock is mounted more than once get_sb_bdev() calls close_bdev_exclusive() to drop the extra bdev reference while holding s_umount. However, sb->s_umount nests inside bd_mutex during __invalidate_device() and close_bdev_exclusive() acquires bd_mutex during blkdev_put(); thus creating an AB-BA deadlock. This condition doesn't trigger frequently. For this condition to be visible to lockdep, the filesystem must occupy the whole device (as __invalidate_device() only grabs bd_mutex for the whole device), the FS must be mounted more than once and partition rescan should be issued while the FS is still mounted. Fix it by dropping s_umount over close_bdev_exclusive(). Signed-off-by: Tejun Heo Reported-by: Ciprian Docan Cc: Al Viro Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/super.c b/fs/super.c index 938119ab8dc..3479ca6f005 100644 --- a/fs/super.c +++ b/fs/super.c @@ -773,7 +773,16 @@ int get_sb_bdev(struct file_system_type *fs_type, goto error_bdev; } + /* + * s_umount nests inside bd_mutex during + * __invalidate_device(). close_bdev_exclusive() + * acquires bd_mutex and can't be called under + * s_umount. Drop s_umount temporarily. This is safe + * as we're holding an active reference. + */ + up_write(&s->s_umount); close_bdev_exclusive(bdev, mode); + down_write(&s->s_umount); } else { char b[BDEVNAME_SIZE]; -- cgit v1.2.3-70-g09d2 From 7a4dec53897ecd3367efb1e12fe8a4edc47dc0e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Aug 2010 12:05:43 -0400 Subject: Fix sget() race with failing mount If sget() finds a matching superblock being set up, it'll grab an active reference to it and grab s_umount. That's fine - we'll wait for completion of foofs_get_sb() that way. However, if said foofs_get_sb() fails we'll end up holding the halfway-created superblock. deactivate_locked_super() called by foofs_get_sb() will just unlock the sucker since we are holding another active reference to it. What we need is a way to tell if superblock has been successfully set up. Unfortunately, neither ->s_root nor the check for MS_ACTIVE quite fit. Cheap and easy way, suitable for backport: new flag set by the (only) caller of ->get_sb(). If that flag isn't present by the time sget() grabbed s_umount on preexisting superblock it has found, it's seeing a stillborn and should just bury it with deactivate_locked_super() (and repeat the search). Longer term we want to set that flag in ->get_sb() instances (and check for it to distinguish between "sget() found us a live sb" and "sget() has allocated an sb, we need to set it up" in there, instead of checking ->s_root as we do now). Signed-off-by: Al Viro Cc: stable@kernel.org --- fs/namespace.c | 2 +- fs/super.c | 6 ++++++ include/linux/fs.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 88058de59c7..32dcd24bbc9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1984,7 +1984,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; - flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | + flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); diff --git a/fs/super.c b/fs/super.c index 3479ca6f005..bd9eea4bb2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -305,8 +305,13 @@ retry: if (s) { up_write(&s->s_umount); destroy_super(s); + s = NULL; } down_write(&old->s_umount); + if (unlikely(!(old->s_flags & MS_BORN))) { + deactivate_locked_super(old); + goto retry; + } return old; } } @@ -918,6 +923,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void goto out_free_secdata; BUG_ON(!mnt->mnt_sb); WARN_ON(!mnt->mnt_sb->s_bdi); + mnt->mnt_sb->s_flags |= MS_BORN; error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); if (error) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9bedf4219f8..58e4b035e28 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -209,6 +209,7 @@ struct inodes_stat_t { #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) -- cgit v1.2.3-70-g09d2 From dca332528bc69e05f67161e1ed59929633d5e63d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 02:31:46 +0400 Subject: no need for list_for_each_entry_safe()/resetting with superblock list just delay __put_super() a bit Signed-off-by: Al Viro --- fs/dcache.c | 12 +++++++----- fs/super.c | 36 +++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index caf08574982..9f2c1341796 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -536,7 +536,7 @@ restart: */ static void prune_dcache(int count) { - struct super_block *sb, *n; + struct super_block *sb, *p = NULL; int w_count; int unused = dentry_stat.nr_unused; int prune_ratio; @@ -550,7 +550,7 @@ static void prune_dcache(int count) else prune_ratio = unused / count; spin_lock(&sb_lock); - list_for_each_entry_safe(sb, n, &super_blocks, s_list) { + list_for_each_entry(sb, &super_blocks, s_list) { if (list_empty(&sb->s_instances)) continue; if (sb->s_nr_dentry_unused == 0) @@ -590,14 +590,16 @@ static void prune_dcache(int count) up_read(&sb->s_umount); } spin_lock(&sb_lock); - /* lock was dropped, must reset next */ - list_safe_reset_next(sb, n, s_list); + if (p) + __put_super(p); count -= pruned; - __put_super(sb); + p = sb; /* more work left to do? */ if (count <= 0) break; } + if (p) + __put_super(p); spin_unlock(&sb_lock); spin_unlock(&dcache_lock); } diff --git a/fs/super.c b/fs/super.c index bd9eea4bb2b..9674ab2c871 100644 --- a/fs/super.c +++ b/fs/super.c @@ -363,10 +363,10 @@ EXPORT_SYMBOL(drop_super); */ void sync_supers(void) { - struct super_block *sb, *n; + struct super_block *sb, *p = NULL; spin_lock(&sb_lock); - list_for_each_entry_safe(sb, n, &super_blocks, s_list) { + list_for_each_entry(sb, &super_blocks, s_list) { if (list_empty(&sb->s_instances)) continue; if (sb->s_op->write_super && sb->s_dirt) { @@ -379,11 +379,13 @@ void sync_supers(void) up_read(&sb->s_umount); spin_lock(&sb_lock); - /* lock was dropped, must reset next */ - list_safe_reset_next(sb, n, s_list); - __put_super(sb); + if (p) + __put_super(p); + p = sb; } } + if (p) + __put_super(p); spin_unlock(&sb_lock); } @@ -397,10 +399,10 @@ void sync_supers(void) */ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) { - struct super_block *sb, *n; + struct super_block *sb, *p = NULL; spin_lock(&sb_lock); - list_for_each_entry_safe(sb, n, &super_blocks, s_list) { + list_for_each_entry(sb, &super_blocks, s_list) { if (list_empty(&sb->s_instances)) continue; sb->s_count++; @@ -412,10 +414,12 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) up_read(&sb->s_umount); spin_lock(&sb_lock); - /* lock was dropped, must reset next */ - list_safe_reset_next(sb, n, s_list); - __put_super(sb); + if (p) + __put_super(p); + p = sb; } + if (p) + __put_super(p); spin_unlock(&sb_lock); } @@ -577,10 +581,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) static void do_emergency_remount(struct work_struct *work) { - struct super_block *sb, *n; + struct super_block *sb, *p = NULL; spin_lock(&sb_lock); - list_for_each_entry_safe(sb, n, &super_blocks, s_list) { + list_for_each_entry(sb, &super_blocks, s_list) { if (list_empty(&sb->s_instances)) continue; sb->s_count++; @@ -594,10 +598,12 @@ static void do_emergency_remount(struct work_struct *work) } up_write(&sb->s_umount); spin_lock(&sb_lock); - /* lock was dropped, must reset next */ - list_safe_reset_next(sb, n, s_list); - __put_super(sb); + if (p) + __put_super(p); + p = sb; } + if (p) + __put_super(p); spin_unlock(&sb_lock); kfree(work); printk("Emergency Remount complete\n"); -- cgit v1.2.3-70-g09d2