diff options
Diffstat (limited to 'fs')
62 files changed, 667 insertions, 417 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 24084732b1d..80ef38c73e5 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -41,19 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct dentry *root; - root = mount_pseudo(fs_type, "anon_inode:", NULL, + return mount_pseudo(fs_type, "anon_inode:", NULL, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); - if (!IS_ERR(root)) { - struct super_block *s = root->d_sb; - anon_inode_inode = alloc_anon_inode(s); - if (IS_ERR(anon_inode_inode)) { - dput(root); - deactivate_locked_super(s); - root = ERR_CAST(anon_inode_inode); - } - } - return root; } static struct file_system_type anon_inode_fs_type = { @@ -175,22 +164,15 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd); static int __init anon_inode_init(void) { - int error; - - error = register_filesystem(&anon_inode_fs_type); - if (error) - goto err_exit; anon_inode_mnt = kern_mount(&anon_inode_fs_type); - if (IS_ERR(anon_inode_mnt)) { - error = PTR_ERR(anon_inode_mnt); - goto err_unregister_filesystem; - } - return 0; + if (IS_ERR(anon_inode_mnt)) + panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); -err_unregister_filesystem: - unregister_filesystem(&anon_inode_fs_type); -err_exit: - panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); + anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + if (IS_ERR(anon_inode_inode)) + panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); + + return 0; } fs_initcall(anon_inode_init); diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 0129b78a690..4f70f383132 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -458,11 +458,10 @@ static int bio_integrity_verify(struct bio *bio) struct blk_integrity_exchg bix; struct bio_vec *bv; sector_t sector = bio->bi_integrity->bip_iter.bi_sector; - unsigned int sectors, total, ret; + unsigned int sectors, ret = 0; void *prot_buf = bio->bi_integrity->bip_buf; int i; - ret = total = 0; bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; @@ -484,8 +483,6 @@ static int bio_integrity_verify(struct bio *bio) sectors = bv->bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; - total += sectors * bi->tuple_size; - BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); kunmap_atomic(kaddr); } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cf32f039336..c0f3718b77a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -513,7 +513,7 @@ struct cifs_mnt_data { static inline unsigned int get_rfc1002_length(void *buf) { - return be32_to_cpu(*((__be32 *)buf)); + return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } static inline void diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 53c15074bb3..834fce759d8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2579,31 +2579,19 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; ssize_t rc = -EACCES; + loff_t lock_pos = pos; - BUG_ON(iocb->ki_pos != pos); - + if (file->f_flags & O_APPEND) + lock_pos = i_size_read(inode); /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents writing. */ down_read(&cinode->lock_sem); - if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) { - mutex_lock(&inode->i_mutex); - rc = __generic_file_aio_write(iocb, iov, nr_segs, - &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); - } - - if (rc > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - rc, rc); - if (err < 0) - rc = err; - } - + CIFS_WRITE_OP)) + rc = generic_file_aio_write(iocb, iov, nr_segs, pos); up_read(&cinode->lock_sem); return rc; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b3757095284..18cd5650a5f 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -270,6 +270,26 @@ cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, iov->iov_len = rqst->rq_pagesz; } +static unsigned long +rqst_len(struct smb_rqst *rqst) +{ + unsigned int i; + struct kvec *iov = rqst->rq_iov; + unsigned long buflen = 0; + + /* total up iov array first */ + for (i = 0; i < rqst->rq_nvec; i++) + buflen += iov[i].iov_len; + + /* add in the page array if there is one */ + if (rqst->rq_npages) { + buflen += rqst->rq_pagesz * (rqst->rq_npages - 1); + buflen += rqst->rq_tailsz; + } + + return buflen; +} + static int smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) { @@ -277,6 +297,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + unsigned long send_length; unsigned int i; size_t total_len = 0, sent; struct socket *ssocket = server->ssocket; @@ -285,6 +306,14 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) if (ssocket == NULL) return -ENOTSOCK; + /* sanity check send length */ + send_length = rqst_len(rqst); + if (send_length != smb_buf_length + 4) { + WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n", + send_length, smb_buf_length); + return -EIO; + } + cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); dump_smb(iov[0].iov_base, iov[0].iov_len); diff --git a/fs/compat.c b/fs/compat.c index 6af20de2c1a..19252b97f0c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -72,8 +72,8 @@ int compat_printk(const char *fmt, ...) * Not all architectures have sys_utime, so implement this in terms * of sys_utimes. */ -asmlinkage long compat_sys_utime(const char __user *filename, - struct compat_utimbuf __user *t) +COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, + struct compat_utimbuf __user *, t) { struct timespec tv[2]; @@ -87,7 +87,7 @@ asmlinkage long compat_sys_utime(const char __user *filename, return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); } -asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags) +COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) { struct timespec tv[2]; @@ -102,7 +102,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filena return do_utimes(dfd, filename, t ? tv : NULL, flags); } -asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t) +COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) { struct timespec tv[2]; @@ -121,7 +121,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filena return do_utimes(dfd, filename, t ? tv : NULL, 0); } -asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t) +COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) { return compat_sys_futimesat(AT_FDCWD, filename, t); } @@ -159,8 +159,8 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long compat_sys_newstat(const char __user * filename, - struct compat_stat __user *statbuf) +COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, + struct compat_stat __user *, statbuf) { struct kstat stat; int error; @@ -171,8 +171,8 @@ asmlinkage long compat_sys_newstat(const char __user * filename, return cp_compat_stat(&stat, statbuf); } -asmlinkage long compat_sys_newlstat(const char __user * filename, - struct compat_stat __user *statbuf) +COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, + struct compat_stat __user *, statbuf) { struct kstat stat; int error; @@ -184,9 +184,9 @@ asmlinkage long compat_sys_newlstat(const char __user * filename, } #ifndef __ARCH_WANT_STAT64 -asmlinkage long compat_sys_newfstatat(unsigned int dfd, - const char __user *filename, - struct compat_stat __user *statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, + const char __user *, filename, + struct compat_stat __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -198,8 +198,8 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, } #endif -asmlinkage long compat_sys_newfstat(unsigned int fd, - struct compat_stat __user * statbuf) +COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, + struct compat_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); @@ -247,7 +247,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * * The following statfs calls are copies of code from fs/statfs.c and * should be checked against those from time to time */ -asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) +COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) { struct kstatfs tmp; int error = user_statfs(pathname, &tmp); @@ -256,7 +256,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta return error; } -asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) +COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) { struct kstatfs tmp; int error = fd_statfs(fd, &tmp); @@ -298,7 +298,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat return 0; } -asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) +COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) { struct kstatfs tmp; int error; @@ -312,7 +312,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s return error; } -asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) +COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) { struct kstatfs tmp; int error; @@ -331,7 +331,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c * Given how simple this syscall is that apporach is more maintainable * than the various conversion hacks. */ -asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) +COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) { struct compat_ustat tmp; struct kstatfs sbuf; @@ -399,8 +399,8 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u } #endif -asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) { mm_segment_t old_fs; struct flock f; @@ -468,16 +468,15 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, return ret; } -asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) { if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64)) return -EINVAL; return compat_sys_fcntl64(fd, cmd, arg); } -asmlinkage long -compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) +COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) { long ret; aio_context_t ctx64; @@ -496,32 +495,24 @@ compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) return ret; } -asmlinkage long -compat_sys_io_getevents(aio_context_t ctx_id, - unsigned long min_nr, - unsigned long nr, - struct io_event __user *events, - struct compat_timespec __user *timeout) +COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, + compat_long_t, min_nr, + compat_long_t, nr, + struct io_event __user *, events, + struct compat_timespec __user *, timeout) { - long ret; struct timespec t; struct timespec __user *ut = NULL; - ret = -EFAULT; - if (unlikely(!access_ok(VERIFY_WRITE, events, - nr * sizeof(struct io_event)))) - goto out; if (timeout) { if (get_compat_timespec(&t, timeout)) - goto out; + return -EFAULT; ut = compat_alloc_user_space(sizeof(*ut)); if (copy_to_user(ut, &t, sizeof(t)) ) - goto out; + return -EFAULT; } - ret = sys_io_getevents(ctx_id, min_nr, nr, events, ut); -out: - return ret; + return sys_io_getevents(ctx_id, min_nr, nr, events, ut); } /* A write operation does a read from user space and vice versa */ @@ -617,8 +608,8 @@ copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) -asmlinkage long -compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) +COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, + int, nr, u32 __user *, iocb) { struct iocb __user * __user *iocb64; long ret; @@ -770,10 +761,10 @@ static int do_nfs4_super_data_conv(void *raw_data) #define NCPFS_NAME "ncpfs" #define NFS4_NAME "nfs4" -asmlinkage long compat_sys_mount(const char __user * dev_name, - const char __user * dir_name, - const char __user * type, unsigned long flags, - const void __user * data) +COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, + const char __user *, dir_name, + const char __user *, type, compat_ulong_t, flags, + const void __user *, data) { char *kernel_type; unsigned long data_page; @@ -869,8 +860,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_old_readdir(unsigned int fd, - struct compat_old_linux_dirent __user *dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, + struct compat_old_linux_dirent __user *, dirent, unsigned int, count) { int error; struct fd f = fdget(fd); @@ -948,8 +939,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_getdents(unsigned int fd, - struct compat_linux_dirent __user *dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, + struct compat_linux_dirent __user *, dirent, unsigned int, count) { struct fd f; struct compat_linux_dirent __user * lastdirent; @@ -981,7 +972,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, return error; } -#ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 +#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 struct compat_getdents_callback64 { struct dir_context ctx; @@ -1033,8 +1024,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_getdents64(unsigned int fd, - struct linux_dirent64 __user * dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) { struct fd f; struct linux_dirent64 __user * lastdirent; @@ -1066,7 +1057,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, fdput(f); return error; } -#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ +#endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ /* * Exactly like fs/open.c:sys_open(), except that it doesn't set the @@ -1287,9 +1278,9 @@ out_nofds: return ret; } -asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timeval __user *tvp) +COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timeval __user *, tvp) { struct timespec end_time, *to = NULL; struct compat_timeval tv; @@ -1320,7 +1311,7 @@ struct compat_sel_arg_struct { compat_uptr_t tvp; }; -asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg) +COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) { struct compat_sel_arg_struct a; @@ -1381,9 +1372,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; } -asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timespec __user *tsp, void __user *sig) +COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timespec __user *, tsp, void __user *, sig) { compat_size_t sigsetsize = 0; compat_uptr_t up = 0; @@ -1400,9 +1391,9 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, sigsetsize); } -asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, - unsigned int nfds, struct compat_timespec __user *tsp, - const compat_sigset_t __user *sigmask, compat_size_t sigsetsize) +COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, + unsigned int, nfds, struct compat_timespec __user *, tsp, + const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { compat_sigset_t ss32; sigset_t ksigmask, sigsaved; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3881610b643..e8228904727 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1538,9 +1538,10 @@ static int compat_ioctl_check_table(unsigned int xcmd) return ioctl_pointer[i] == xcmd; } -asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg32) { + unsigned long arg = arg32; struct fd f = fdget(fd); int error = -EBADF; if (!f.file) diff --git a/fs/dcache.c b/fs/dcache.c index 265e0ce9769..ca02c13a84a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2833,9 +2833,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; - if (*buflen < dlen + 1) - return -ENAMETOOLONG; *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; p = *buffer -= dlen + 1; *p++ = '/'; while (dlen--) { diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 8dd524f3228..cdb2971192a 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -21,7 +21,7 @@ static ssize_t efivarfs_file_write(struct file *file, u32 attributes; struct inode *inode = file->f_mapping->host; unsigned long datasize = count - sizeof(attributes); - ssize_t bytes = 0; + ssize_t bytes; bool set = false; if (count < sizeof(attributes)) @@ -33,14 +33,9 @@ static ssize_t efivarfs_file_write(struct file *file, if (attributes & ~(EFI_VARIABLE_MASK)) return -EINVAL; - data = kmalloc(datasize, GFP_KERNEL); - if (!data) - return -ENOMEM; - - if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) { - bytes = -EFAULT; - goto out; - } + data = memdup_user(userbuf + sizeof(attributes), datasize); + if (IS_ERR(data)) + return PTR_ERR(data); bytes = efivar_entry_set_get_size(var, attributes, &datasize, data, &set); diff --git a/fs/exec.c b/fs/exec.c index 3d78fccdd72..4f59402fdda 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1619,9 +1619,9 @@ SYSCALL_DEFINE3(execve, return do_execve(getname(filename), argv, envp); } #ifdef CONFIG_COMPAT -asmlinkage long compat_sys_execve(const char __user * filename, - const compat_uptr_t __user * argv, - const compat_uptr_t __user * envp) +COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, + const compat_uptr_t __user *, argv, + const compat_uptr_t __user *, envp) { return compat_do_execve(getname(filename), argv, envp); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6e39895a91b..24bfd7ff304 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/aio.h> +#include <linux/bitops.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -3921,18 +3922,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) void ext4_set_inode_flags(struct inode *inode) { unsigned int flags = EXT4_I(inode)->i_flags; + unsigned int new_fl = 0; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); if (flags & EXT4_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & EXT4_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & EXT4_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & EXT4_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ diff --git a/fs/file.c b/fs/file.c index db25c2bdfe4..b61293badfb 100644 --- a/fs/file.c +++ b/fs/file.c @@ -497,7 +497,7 @@ repeat: error = fd; #if 1 /* Sanity check */ - if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { + if (rcu_access_pointer(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } @@ -683,35 +683,54 @@ EXPORT_SYMBOL(fget_raw); * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ -struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed) +static unsigned long __fget_light(unsigned int fd, fmode_t mask) { struct files_struct *files = current->files; struct file *file; - *fput_needed = 0; if (atomic_read(&files->count) == 1) { file = __fcheck_files(files, fd); - if (file && (file->f_mode & mask)) - file = NULL; + if (!file || unlikely(file->f_mode & mask)) + return 0; + return (unsigned long)file; } else { file = __fget(fd, mask); - if (file) - *fput_needed = 1; + if (!file) + return 0; + return FDPUT_FPUT | (unsigned long)file; } - - return file; } -struct file *fget_light(unsigned int fd, int *fput_needed) +unsigned long __fdget(unsigned int fd) { - return __fget_light(fd, FMODE_PATH, fput_needed); + return __fget_light(fd, FMODE_PATH); } -EXPORT_SYMBOL(fget_light); +EXPORT_SYMBOL(__fdget); -struct file *fget_raw_light(unsigned int fd, int *fput_needed) +unsigned long __fdget_raw(unsigned int fd) { - return __fget_light(fd, 0, fput_needed); + return __fget_light(fd, 0); } +unsigned long __fdget_pos(unsigned int fd) +{ + unsigned long v = __fdget(fd); + struct file *file = (struct file *)(v & ~3); + + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file_count(file) > 1) { + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); + } + } + return v; +} + +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ + void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; diff --git a/fs/file_table.c b/fs/file_table.c index 5fff9030be3..5b24008ea4f 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -135,6 +135,7 @@ struct file *get_empty_filp(void) atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); + mutex_init(&f->f_pos_lock); eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e0259a163f9..d754e3cf99a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -40,18 +40,13 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; - /* - * Write only inodes dirtied before this time. Don't forget to set - * older_than_this_is_set when you set this. - */ - unsigned long older_than_this; + unsigned long *older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ - unsigned int older_than_this_is_set:1; enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -252,10 +247,10 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; - WARN_ON_ONCE(!work->older_than_this_is_set); while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (inode_dirtied_after(inode, work->older_than_this)) + if (work->older_than_this && + inode_dirtied_after(inode, *work->older_than_this)) break; list_move(&inode->i_wb_list, &tmp); moved++; @@ -742,8 +737,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = reason, - .older_than_this = jiffies, - .older_than_this_is_set = 1, }; spin_lock(&wb->list_lock); @@ -802,13 +795,12 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; + unsigned long oldest_jif; struct inode *inode; long progress; - if (!work->older_than_this_is_set) { - work->older_than_this = jiffies; - work->older_than_this_is_set = 1; - } + oldest_jif = jiffies; + work->older_than_this = &oldest_jif; spin_lock(&wb->list_lock); for (;;) { @@ -842,10 +834,10 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - work->older_than_this = jiffies - + oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - work->older_than_this = jiffies; + oldest_jif = jiffies; trace_writeback_start(wb->bdi, work); if (list_empty(&wb->b_io)) @@ -1357,21 +1349,18 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages - * @sb: the superblock - * @older_than_this: timestamp + * @sb: the superblock * * This function writes and waits on any dirty inode belonging to this - * superblock that has been dirtied before given timestamp. + * super_block. */ -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this) +void sync_inodes_sb(struct super_block *sb) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, - .older_than_this = older_than_this, - .older_than_this_is_set = 1, .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 968ce411db5..32602c667b4 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -103,6 +103,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, folder = &entry->folder; memset(folder, 0, sizeof(*folder)); folder->type = cpu_to_be16(HFSPLUS_FOLDER); + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) + folder->flags |= cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT); folder->id = cpu_to_be32(inode->i_ino); HFSPLUS_I(inode)->create_date = folder->create_date = @@ -203,6 +205,36 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, return hfs_brec_find(fd, hfs_find_rec_by_key); } +static void hfsplus_subfolders_inc(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Increment subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + */ + HFSPLUS_I(dir)->subfolders++; + } +} + +static void hfsplus_subfolders_dec(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Decrement subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + * + * Check for zero. Some subfolders may have been created + * by an implementation ignorant of this counter. + */ + if (HFSPLUS_I(dir)->subfolders) + HFSPLUS_I(dir)->subfolders--; + } +} + int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) { @@ -247,6 +279,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, goto err1; dir->i_size++; + if (S_ISDIR(inode->i_mode)) + hfsplus_subfolders_inc(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -336,6 +370,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) goto out; dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -380,6 +416,7 @@ int hfsplus_rename_cat(u32 cnid, hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); + type = be16_to_cpu(entry.type); /* create new dir entry with the data from the old entry */ hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); @@ -394,6 +431,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; dst_dir->i_size++; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_inc(dst_dir); dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; /* finally remove the old entry */ @@ -405,6 +444,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; src_dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(src_dir); src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; /* remove old thread entry */ diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 08846425b67..62d571eb69b 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -242,6 +242,7 @@ struct hfsplus_inode_info { */ sector_t fs_blocks; u8 userflags; /* BSD user file flags */ + u32 subfolders; /* Subfolder count (HFSX only) */ struct list_head open_dir_list; loff_t phys_size; diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 8ffb3a8ffe7..5a126828d85 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -261,7 +261,7 @@ struct hfsplus_cat_folder { struct DInfo user_info; struct DXInfo finder_info; __be32 text_encoding; - u32 reserved; + __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */ } __packed; /* HFS file info (stolen from hfs.h) */ @@ -301,11 +301,13 @@ struct hfsplus_cat_file { struct hfsplus_fork_raw rsrc_fork; } __packed; -/* File attribute bits */ +/* File and folder flag bits */ #define HFSPLUS_FILE_LOCKED 0x0001 #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 #define HFSPLUS_XATTR_EXISTS 0x0004 #define HFSPLUS_ACL_EXISTS 0x0008 +#define HFSPLUS_HAS_FOLDER_COUNT 0x0010 /* Folder has subfolder count + * (HFSX only) */ /* HFS+ catalog thread (part of a cat_entry) */ struct hfsplus_cat_thread { diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index fa929f325f8..a4f45bd88a6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -375,6 +375,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) hip->extent_state = 0; hip->flags = 0; hip->userflags = 0; + hip->subfolders = 0; memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->alloc_blocks = 0; @@ -494,6 +495,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); HFSPLUS_I(inode)->create_date = folder->create_date; HFSPLUS_I(inode)->fs_blocks = 0; + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + HFSPLUS_I(inode)->subfolders = + be32_to_cpu(folder->subfolders); + } inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (type == HFSPLUS_FILE) { @@ -566,6 +571,10 @@ int hfsplus_cat_write_inode(struct inode *inode) folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); folder->valence = cpu_to_be32(inode->i_size - 2); + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + folder->subfolders = + cpu_to_be32(HFSPLUS_I(inode)->subfolders); + } hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); } else if (HFSPLUS_IS_RSRC(inode)) { diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 968eab5bc1f..68537e8b7a0 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -75,7 +75,7 @@ int hfsplus_parse_options_remount(char *input, int *force) int token; if (!input) - return 0; + return 1; while ((p = strsep(&input, ",")) != NULL) { if (!*p) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 0d6ce895a9e..0f4152defe7 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -94,6 +94,7 @@ const void *kernfs_super_ns(struct super_block *sb) * @fs_type: file_system_type of the fs being mounted * @flags: mount flags specified for the mount * @root: kernfs_root of the hierarchy being mounted + * @new_sb_created: tell the caller if we allocated a new superblock * @ns: optional namespace tag of the mount * * This is to be called from each kernfs user's file_system_type->mount() @@ -104,7 +105,8 @@ const void *kernfs_super_ns(struct super_block *sb) * The return value can be passed to the vfs layer verbatim. */ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, const void *ns) + struct kernfs_root *root, bool *new_sb_created, + const void *ns) { struct super_block *sb; struct kernfs_super_info *info; @@ -122,6 +124,10 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, kfree(info); if (IS_ERR(sb)) return ERR_CAST(sb); + + if (new_sb_created) + *new_sb_created = !sb->s_root; + if (!sb->s_root) { error = kernfs_fill_super(sb); if (error) { diff --git a/fs/mount.h b/fs/mount.h index a17458ca6f2..b29e42f05f3 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -19,13 +19,13 @@ struct mnt_pcp { }; struct mountpoint { - struct list_head m_hash; + struct hlist_node m_hash; struct dentry *m_dentry; int m_count; }; struct mount { - struct list_head mnt_hash; + struct hlist_node mnt_hash; struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; diff --git a/fs/namei.c b/fs/namei.c index 385f7817bfc..4b491b43199 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, return false; if (!d_mountpoint(path->dentry)) - break; + return true; mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) @@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return true; -} - -static void follow_mount_rcu(struct nameidata *nd) -{ - while (d_mountpoint(nd->path.dentry)) { - struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); - if (!mounted) - break; - nd->path.mnt = &mounted->mnt; - nd->path.dentry = mounted->mnt.mnt_root; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - } + return read_seqretry(&mount_lock, nd->m_seq); } static int follow_dotdot_rcu(struct nameidata *nd) @@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - follow_mount_rcu(nd); + while (d_mountpoint(nd->path.dentry)) { + struct mount *mounted; + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (!mounted) + break; + nd->path.mnt = &mounted->mnt; + nd->path.dentry = mounted->mnt.mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + if (!read_seqretry(&mount_lock, nd->m_seq)) + goto failed; + } nd->inode = nd->path.dentry->d_inode; return 0; @@ -1884,7 +1881,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (f.need_put) + if (f.flags & FDPUT_FPUT) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); rcu_read_lock(); diff --git a/fs/namespace.c b/fs/namespace.c index 22e536705c4..2ffc5a2905d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,11 +23,34 @@ #include <linux/uaccess.h> #include <linux/proc_ns.h> #include <linux/magic.h> +#include <linux/bootmem.h> #include "pnode.h" #include "internal.h" -#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) -#define HASH_SIZE (1UL << HASH_SHIFT) +static unsigned int m_hash_mask __read_mostly; +static unsigned int m_hash_shift __read_mostly; +static unsigned int mp_hash_mask __read_mostly; +static unsigned int mp_hash_shift __read_mostly; + +static __initdata unsigned long mhash_entries; +static int __init set_mhash_entries(char *str) +{ + if (!str) + return 0; + mhash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mhash_entries=", set_mhash_entries); + +static __initdata unsigned long mphash_entries; +static int __init set_mphash_entries(char *str) +{ + if (!str) + return 0; + mphash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mphash_entries=", set_mphash_entries); static int event; static DEFINE_IDA(mnt_id_ida); @@ -36,8 +59,8 @@ static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; -static struct list_head *mount_hashtable __read_mostly; -static struct list_head *mountpoint_hashtable __read_mostly; +static struct hlist_head *mount_hashtable __read_mostly; +static struct hlist_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static DECLARE_RWSEM(namespace_sem); @@ -55,12 +78,19 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) +static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); - tmp = tmp + (tmp >> HASH_SHIFT); - return tmp & (HASH_SIZE - 1); + tmp = tmp + (tmp >> m_hash_shift); + return &mount_hashtable[tmp & m_hash_mask]; +} + +static inline struct hlist_head *mp_hash(struct dentry *dentry) +{ + unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); + tmp = tmp + (tmp >> mp_hash_shift); + return &mountpoint_hashtable[tmp & mp_hash_mask]; } /* @@ -187,7 +217,7 @@ static struct mount *alloc_vfsmnt(const char *name) mnt->mnt_writers = 0; #endif - INIT_LIST_HEAD(&mnt->mnt_hash); + INIT_HLIST_NODE(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); @@ -575,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct hlist_head *head = m_hash(mnt, dentry); struct mount *p; - list_for_each_entry_rcu(p, head, mnt_hash) + hlist_for_each_entry_rcu(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) return p; return NULL; @@ -590,13 +620,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); - struct mount *p; - - list_for_each_entry_reverse(p, head, mnt_hash) - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - return p; - return NULL; + struct mount *p, *res; + res = p = __lookup_mnt(mnt, dentry); + if (!p) + goto out; + hlist_for_each_entry_continue(p, mnt_hash) { + if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) + break; + res = p; + } +out: + return res; } /* @@ -633,11 +667,11 @@ struct vfsmount *lookup_mnt(struct path *path) static struct mountpoint *new_mountpoint(struct dentry *dentry) { - struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); + struct hlist_head *chain = mp_hash(dentry); struct mountpoint *mp; int ret; - list_for_each_entry(mp, chain, m_hash) { + hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { /* might be worth a WARN_ON() */ if (d_unlinked(dentry)) @@ -659,7 +693,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) mp->m_dentry = dentry; mp->m_count = 1; - list_add(&mp->m_hash, chain); + hlist_add_head(&mp->m_hash, chain); return mp; } @@ -670,7 +704,7 @@ static void put_mountpoint(struct mountpoint *mp) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); - list_del(&mp->m_hash); + hlist_del(&mp->m_hash); kfree(mp); } } @@ -712,7 +746,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); - list_del_init(&mnt->mnt_hash); + hlist_del_init_rcu(&mnt->mnt_hash); put_mountpoint(mnt->mnt_mp); mnt->mnt_mp = NULL; } @@ -739,15 +773,14 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mp->m_dentry)); + hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } /* * vfsmount lock must be held for write */ -static void commit_tree(struct mount *mnt) +static void commit_tree(struct mount *mnt, struct mount *shadows) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -762,8 +795,11 @@ static void commit_tree(struct mount *mnt) list_splice(&head, n->list.prev); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mnt->mnt_mountpoint)); + if (shadows) + hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); + else + hlist_add_head_rcu(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); } @@ -1153,26 +1189,28 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -static LIST_HEAD(unmounted); /* protected by namespace_sem */ +static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { struct mount *mnt; - LIST_HEAD(head); + struct hlist_head head = unmounted; - if (likely(list_empty(&unmounted))) { + if (likely(hlist_empty(&head))) { up_write(&namespace_sem); return; } - list_splice_init(&unmounted, &head); + head.first->pprev = &head.first; + INIT_HLIST_HEAD(&unmounted); + up_write(&namespace_sem); synchronize_rcu(); - while (!list_empty(&head)) { - mnt = list_first_entry(&head, struct mount, mnt_hash); - list_del_init(&mnt->mnt_hash); + while (!hlist_empty(&head)) { + mnt = hlist_entry(head.first, struct mount, mnt_hash); + hlist_del_init(&mnt->mnt_hash); if (mnt->mnt_ex_mountpoint.mnt) path_put(&mnt->mnt_ex_mountpoint); mntput(&mnt->mnt); @@ -1193,16 +1231,19 @@ static inline void namespace_lock(void) */ void umount_tree(struct mount *mnt, int how) { - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); struct mount *p; + struct mount *last = NULL; - for (p = mnt; p; p = next_mnt(p, mnt)) - list_move(&p->mnt_hash, &tmp_list); + for (p = mnt; p; p = next_mnt(p, mnt)) { + hlist_del_init_rcu(&p->mnt_hash); + hlist_add_head(&p->mnt_hash, &tmp_list); + } if (how) propagate_umount(&tmp_list); - list_for_each_entry(p, &tmp_list, mnt_hash) { + hlist_for_each_entry(p, &tmp_list, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); @@ -1220,8 +1261,13 @@ void umount_tree(struct mount *mnt, int how) p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); + last = p; + } + if (last) { + last->mnt_hash.next = unmounted.first; + unmounted.first = tmp_list.first; + unmounted.first->pprev = &unmounted.first; } - list_splice(&tmp_list, &unmounted); } static void shrink_submounts(struct mount *mnt); @@ -1605,24 +1651,23 @@ static int attach_recursive_mnt(struct mount *source_mnt, struct mountpoint *dest_mp, struct path *parent_path) { - LIST_HEAD(tree_list); + HLIST_HEAD(tree_list); struct mount *child, *p; + struct hlist_node *n; int err; if (IS_MNT_SHARED(dest_mnt)) { err = invent_group_ids(source_mnt, true); if (err) goto out; - } - err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); - if (err) - goto out_cleanup_ids; - - lock_mount_hash(); - - if (IS_MNT_SHARED(dest_mnt)) { + err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); + if (err) + goto out_cleanup_ids; + lock_mount_hash(); for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); + } else { + lock_mount_hash(); } if (parent_path) { detach_mnt(source_mnt, parent_path); @@ -1630,20 +1675,22 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); - commit_tree(source_mnt); + commit_tree(source_mnt, NULL); } - list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { - list_del_init(&child->mnt_hash); - commit_tree(child); + hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { + struct mount *q; + hlist_del_init(&child->mnt_hash); + q = __lookup_mnt_last(&child->mnt_parent->mnt, + child->mnt_mountpoint); + commit_tree(child, q); } unlock_mount_hash(); return 0; out_cleanup_ids: - if (IS_MNT_SHARED(dest_mnt)) - cleanup_group_ids(source_mnt, NULL); + cleanup_group_ids(source_mnt, NULL); out: return err; } @@ -2777,18 +2824,24 @@ void __init mnt_init(void) mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); - mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); + mount_hashtable = alloc_large_system_hash("Mount-cache", + sizeof(struct hlist_head), + mhash_entries, 19, + 0, + &m_hash_shift, &m_hash_mask, 0, 0); + mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", + sizeof(struct hlist_head), + mphash_entries, 19, + 0, + &mp_hash_shift, &mp_hash_mask, 0, 0); if (!mount_hashtable || !mountpoint_hashtable) panic("Failed to allocate mount hash table\n"); - printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); - - for (u = 0; u < HASH_SIZE; u++) - INIT_LIST_HEAD(&mount_hashtable[u]); - for (u = 0; u < HASH_SIZE; u++) - INIT_LIST_HEAD(&mountpoint_hashtable[u]); + for (u = 0; u <= m_hash_mask; u++) + INIT_HLIST_HEAD(&mount_hashtable[u]); + for (u = 0; u <= mp_hash_mask; u++) + INIT_HLIST_HEAD(&mountpoint_hashtable[u]); kernfs_init(); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ef792f29f83..5d8ccecf5f5 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -659,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL) + goto out_enoent; - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { - rcu_read_unlock(); - return -ENOENT; - } + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); return 0; +out_enoent: + rcu_read_unlock(); + return -ENOENT; } static struct inode * diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 28a0a3cbd3b..360114ae8b8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -164,17 +164,16 @@ static void nfs_zap_caches_locked(struct inode *inode) if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfs_fscache_invalidate(inode); nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_LABEL | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_PAGECACHE; } else nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_LABEL | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_PAGECACHE; + nfs_zap_label_cache_locked(nfsi); } void nfs_zap_caches(struct inode *inode) @@ -266,6 +265,13 @@ nfs_init_locked(struct inode *inode, void *opaque) } #ifdef CONFIG_NFS_V4_SECURITY_LABEL +static void nfs_clear_label_invalid(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL; + spin_unlock(&inode->i_lock); +} + void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label) { @@ -283,6 +289,7 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, __func__, (char *)label->label, label->len, error); + nfs_clear_label_invalid(inode); } } @@ -1648,7 +1655,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = fattr->du.nfs2.blocks; /* Update attrtimeo value if we're out of the unstable period */ - if (invalid & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) { + if (invalid & NFS_INO_INVALID_ATTR) { nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -1661,7 +1668,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } } invalid &= ~NFS_INO_INVALID_ATTR; - invalid &= ~NFS_INO_INVALID_LABEL; /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8b5cc04a861..b46cf5a6732 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -176,7 +176,8 @@ extern struct nfs_server *nfs4_create_server( extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen); + struct sockaddr *sap, size_t salen, + struct net *net); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, @@ -279,9 +280,18 @@ static inline void nfs4_label_free(struct nfs4_label *label) } return; } + +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ + if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL)) + nfsi->cache_validity |= NFS_INO_INVALID_LABEL; +} #else static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } static inline void nfs4_label_free(void *label) {} +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ +} #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ /* proc.c */ diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index aa9bc973f36..a462ef0fb5d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -18,6 +18,7 @@ #include <linux/lockd/bind.h> #include <linux/nfs_mount.h> #include <linux/freezer.h> +#include <linux/xattr.h> #include "iostat.h" #include "internal.h" diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 860ad26a559..0e46d3d1b6c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1135,6 +1135,7 @@ static int nfs_probe_destination(struct nfs_server *server) * @hostname: new end-point's hostname * @sap: new end-point's socket address * @salen: size of "sap" + * @net: net namespace * * The nfs_server must be quiescent before this function is invoked. * Either its session is drained (NFSv4.1+), or its transport is @@ -1143,13 +1144,13 @@ static int nfs_probe_destination(struct nfs_server *server) * Returns zero on success, or a negative errno value. */ int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen) + struct sockaddr *sap, size_t salen, struct net *net) { struct nfs_client *clp = server->nfs_client; struct rpc_clnt *clnt = server->client; struct xprt_create xargs = { .ident = clp->cl_proto, - .net = &init_net, + .net = net, .dstaddr = sap, .addrlen = salen, .servername = hostname, @@ -1189,7 +1190,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_rpcclient->cl_auth->au_flavor, clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, clp->cl_net); + clp->cl_minorversion, net); nfs_put_client(clp); if (error != 0) { nfs_server_insert_lists(server); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 12c8132ad40..b9a35c05b60 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -324,8 +324,9 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) &rdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ); + if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -435,8 +436,9 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) &wdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE); + if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 4e7f05d3e9d..3d5dbf80d46 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -121,9 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry, } static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct nfs_server *server) + struct sockaddr *sa, size_t salen, struct net *net) { - struct net *net = rpc_net_ns(server->client); ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); @@ -223,6 +222,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client); struct vfsmount *mnt = ERR_PTR(-ENOENT); char *mnt_path; unsigned int maxbuflen; @@ -248,8 +248,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, continue; mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, - NFS_SB(mountdata->sb)); + mountdata->addr, addr_bufsize, net); if (mountdata->addrlen == 0) continue; @@ -419,6 +418,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(server->client); struct sockaddr *sap; unsigned int s; size_t salen; @@ -440,7 +440,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, continue; salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, server); + sap, addr_bufsize, net); if (salen == 0) continue; rpc_set_port(sap, NFS_PORT); @@ -450,7 +450,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, if (hostname == NULL) break; - error = nfs4_update_server(server, hostname, sap, salen); + error = nfs4_update_server(server, hostname, sap, salen, net); kfree(hostname); if (error == 0) break; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2da6a698b8f..450bfedbe2f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2398,13 +2398,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { /* Use that stateid */ - } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) { + } else if (truncate && state != NULL) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, }; - nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - &lockowner); + if (!nfs4_valid_open_stateid(state)) + return -EBADF; + if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, + &lockowner) == -EIO) + return -EBADF; } else nfs4_stateid_copy(&arg.stateid, &zero_stateid); @@ -4011,8 +4014,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, { nfs4_stateid current_stateid; - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) - return false; + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + return true; return nfs4_stateid_match(stateid, ¤t_stateid); } @@ -5828,8 +5832,7 @@ struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; struct nfs_server *server; struct nfs_release_lockowner_args args; - struct nfs4_sequence_args seq_args; - struct nfs4_sequence_res seq_res; + struct nfs_release_lockowner_res res; unsigned long timestamp; }; @@ -5837,7 +5840,7 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata { struct nfs_release_lockowner_data *data = calldata; nfs40_setup_sequence(data->server, - &data->seq_args, &data->seq_res, task); + &data->args.seq_args, &data->res.seq_res, task); data->timestamp = jiffies; } @@ -5846,7 +5849,7 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) struct nfs_release_lockowner_data *data = calldata; struct nfs_server *server = data->server; - nfs40_sequence_done(task, &data->seq_res); + nfs40_sequence_done(task, &data->res.seq_res); switch (task->tk_status) { case 0: @@ -5887,7 +5890,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; - nfs4_init_sequence(&data->seq_args, &data->seq_res, 0); data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; @@ -5895,6 +5897,8 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data->args.lock_owner.s_dev = server->s_dev; msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); return 0; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e5be72518bd..0deb32105cc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -974,9 +974,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); ret = 0; - smp_rmb(); - if (!list_empty(&lsp->ls_seqid.list)) - ret = -EWOULDBLOCK; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -984,10 +981,9 @@ out: return ret; } -static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { const nfs4_stateid *src; - int ret; int seq; do { @@ -996,12 +992,7 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) if (test_bit(NFS_OPEN_STATE, &state->flags)) src = &state->open_stateid; nfs4_stateid_copy(dst, src); - ret = 0; - smp_rmb(); - if (!list_empty(&state->owner->so_seqid.list)) - ret = -EWOULDBLOCK; } while (read_seqretry(&state->seqlock, seq)); - return ret; } /* @@ -1015,15 +1006,19 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, if (ret == -EIO) /* A lost lock - don't even consider delegations */ goto out; - if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) + /* returns true if delegation stateid found and copied */ + if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) { + ret = 0; goto out; + } if (ret != -ENOENT) /* nfs4_copy_delegation_stateid() didn't over-write * dst, so it still has the lock stateid which we now * choose to use. */ goto out; - ret = nfs4_copy_open_stateid(dst, state); + nfs4_copy_open_stateid(dst, state); + ret = 0; out: if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) dst->seqid = 0; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 017d3cb5e99..6d7be3f8035 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -449,6 +449,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); + err = nfserrno(host_err); out_put_write_access: if (size_change) diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 0b9ff4395e6..abc8cbcfe90 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -86,7 +86,7 @@ static int dnotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 0e792f5e314..dc638f786d5 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -147,7 +147,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *fanotify_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { int ret = 0; struct fanotify_event_info *event; @@ -192,10 +192,12 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); if (ret) { - BUG_ON(mask & FAN_ALL_PERM_EVENTS); + /* Permission events shouldn't be merged */ + BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); - ret = 0; + + return 0; } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b6175fa11bf..287a22c0414 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -698,6 +698,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct fsnotify_group *group; int f_flags, fd; struct user_struct *user; + struct fanotify_event_info *oevent; pr_debug("%s: flags=%d event_f_flags=%d\n", __func__, flags, event_f_flags); @@ -730,8 +731,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); + oevent = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); + if (unlikely(!oevent)) { + fd = -ENOMEM; + goto out_destroy_group; + } + group->overflow_event = &oevent->fse; + fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + oevent->tgid = get_pid(task_tgid(current)); + oevent->path.mnt = NULL; + oevent->path.dentry = NULL; + group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + oevent->response = 0; mutex_init(&group->fanotify_data.access_mutex); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 1d4e1ea2f37..9d3e9c50066 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -179,7 +179,7 @@ static int send_to_group(struct inode *to_tell, return group->ops->handle_event(group, to_tell, inode_mark, vfsmount_mark, mask, data, data_is, - file_name); + file_name, cookie); } /* diff --git a/fs/notify/group.c b/fs/notify/group.c index ee674fe2cec..ad199598045 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -55,6 +55,13 @@ void fsnotify_destroy_group(struct fsnotify_group *group) /* clear the notification queue of all events */ fsnotify_flush_notify(group); + /* + * Destroy overflow event (we cannot use fsnotify_destroy_event() as + * that deliberately ignores overflow events. + */ + if (group->overflow_event) + group->ops->free_event(group->overflow_event); + fsnotify_put_group(group); } @@ -99,7 +106,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) INIT_LIST_HEAD(&group->marks_list); group->ops = ops; - fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW); return group; } diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 485eef3f440..ed855ef6f07 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,6 +27,6 @@ extern int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name); + const unsigned char *file_name, u32 cookie); extern const struct fsnotify_ops inotify_fsnotify_ops; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index d5ee56348bb..43ab1e1a07a 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -67,7 +67,7 @@ int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -103,6 +103,7 @@ int inotify_handle_event(struct fsnotify_group *group, fsn_event = &event->fse; fsnotify_init_event(fsn_event, inode, mask); event->wd = i_mark->wd; + event->sync_cookie = cookie; event->name_len = len; if (len) strcpy(event->name, file_name); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 497395c8274..78a2ca3966c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -495,7 +495,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* Queue ignore event for the watch */ inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL); + NULL, FSNOTIFY_EVENT_NONE, NULL, 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -633,11 +633,23 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod static struct fsnotify_group *inotify_new_group(unsigned int max_events) { struct fsnotify_group *group; + struct inotify_event_info *oevent; group = fsnotify_alloc_group(&inotify_fsnotify_ops); if (IS_ERR(group)) return group; + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); + if (unlikely(!oevent)) { + fsnotify_destroy_group(group); + return ERR_PTR(-ENOMEM); + } + group->overflow_event = &oevent->fse; + fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + oevent->wd = -1; + oevent->sync_cookie = 0; + oevent->name_len = 0; + group->max_events = max_events; spin_lock_init(&group->inotify_data.idr_lock); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 18b3c4427dc..1e58402171a 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -80,7 +80,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, /* * Add an event to the group notification queue. The group can later pull this * event off the queue to deal with. The function returns 0 if the event was - * added to the queue, 1 if the event was merged with some other queued event. + * added to the queue, 1 if the event was merged with some other queued event, + * 2 if the queue of events has overflown. */ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, @@ -95,10 +96,14 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, mutex_lock(&group->notification_mutex); if (group->q_len >= group->max_events) { + ret = 2; /* Queue overflow event only if it isn't already queued */ - if (list_empty(&group->overflow_event.list)) - event = &group->overflow_event; - ret = 1; + if (!list_empty(&group->overflow_event->list)) { + mutex_unlock(&group->notification_mutex); + return ret; + } + event = group->overflow_event; + goto queue; } if (!list_empty(list) && merge) { @@ -109,6 +114,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, } } +queue: group->q_len++; list_add_tail(&event->list, list); mutex_unlock(&group->notification_mutex); @@ -132,7 +138,11 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group event = list_first_entry(&group->notification_list, struct fsnotify_event, list); - list_del(&event->list); + /* + * We need to init list head for the case of overflow event so that + * check in fsnotify_add_notify_events() works + */ + list_del_init(&event->list); group->q_len--; return event; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8450262bcf2..51632c40e89 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2393,8 +2393,8 @@ out_dio: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, *ppos, + *ppos + count - 1); if (ret < 0) written = ret; @@ -2407,8 +2407,8 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, *ppos, + *ppos + count - 1); } /* diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index aaa50611ec6..d7b5108789e 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) */ if (status < 0) mlog_errno(status); + /* + * Clear dq_off so that we search for the structure in quota file next + * time we acquire it. The structure might be deleted and reallocated + * elsewhere by another node while our dquot structure is on freelist. + */ + dquot->dq_off = 0; clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_trans: ocfs2_commit_trans(osb, handle); @@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { - status = ocfs2_qinfo_lock(info, 0); - if (status < 0) - goto out_dq; - status = qtree_read_dquot(&info->dqi_gi, dquot); - ocfs2_qinfo_unlock(info, 0); - if (status < 0) - goto out_dq; - } - set_bit(DQ_READ_B, &dquot->dq_flags); + status = ocfs2_qinfo_lock(info, 0); + if (status < 0) + goto out_dq; + /* + * We always want to read dquot structure from disk because we don't + * know what happened with it while it was on freelist. + */ + status = qtree_read_dquot(&info->dqi_gi, dquot); + ocfs2_qinfo_unlock(info, 0); + if (status < 0) + goto out_dq; OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2e4344be3b9..2001862bf2b 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); out: - /* Clear the read bit so that next time someone uses this - * dquot he reads fresh info from disk and allocates local - * dquot structure */ - clear_bit(DQ_READ_B, &dquot->dq_flags); return status; } diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 1324e6600e5..ca5ce14cbdd 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -346,7 +346,9 @@ int ocfs2_cluster_connect(const char *stack_name, strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); new_conn->cc_namelen = grouplen; - strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1); + if (cluster_name_len) + strlcpy(new_conn->cc_cluster_name, cluster_name, + CLUSTER_NAME_MAX + 1); new_conn->cc_cluster_name_len = cluster_name_len; new_conn->cc_recovery_handler = recovery_handler; new_conn->cc_recovery_data = recovery_data; diff --git a/fs/open.c b/fs/open.c index 4b3e1edf2fe..b9ed8b25c10 100644 --- a/fs/open.c +++ b/fs/open.c @@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f, return 0; } + /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ + if (S_ISREG(inode->i_mode)) + f->f_mode |= FMODE_ATOMIC_POS; + f->f_op = fops_get(inode->i_fop); if (unlikely(WARN_ON(!f->f_op))) { error = -ENODEV; diff --git a/fs/pnode.c b/fs/pnode.c index c7221bb1980..88396df725b 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -220,14 +220,14 @@ static struct mount *get_source(struct mount *dest, * @tree_list : list of heads of trees to be attached. */ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, - struct mount *source_mnt, struct list_head *tree_list) + struct mount *source_mnt, struct hlist_head *tree_list) { struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct mount *m, *child; int ret = 0; struct mount *prev_dest_mnt = dest_mnt; struct mount *prev_src_mnt = source_mnt; - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); for (m = propagation_next(dest_mnt, dest_mnt); m; m = propagation_next(m, dest_mnt)) { @@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, child = copy_tree(source, source->mnt.mnt_root, type); if (IS_ERR(child)) { ret = PTR_ERR(child); - list_splice(tree_list, tmp_list.prev); + tmp_list = *tree_list; + tmp_list.first->pprev = &tmp_list.first; + INIT_HLIST_HEAD(tree_list); goto out; } if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { mnt_set_mountpoint(m, dest_mp, child); - list_add_tail(&child->mnt_hash, tree_list); + hlist_add_head(&child->mnt_hash, tree_list); } else { /* * This can happen if the parent mount was bind mounted * on some subdirectory of a shared/slave mount. */ - list_add_tail(&child->mnt_hash, &tmp_list); + hlist_add_head(&child->mnt_hash, &tmp_list); } prev_dest_mnt = m; prev_src_mnt = child; } out: lock_mount_hash(); - while (!list_empty(&tmp_list)) { - child = list_first_entry(&tmp_list, struct mount, mnt_hash); + while (!hlist_empty(&tmp_list)) { + child = hlist_entry(tmp_list.first, struct mount, mnt_hash); umount_tree(child, 0); } unlock_mount_hash(); @@ -338,8 +340,10 @@ static void __propagate_umount(struct mount *mnt) * umount the child only if the child has no * other children */ - if (child && list_empty(&child->mnt_mounts)) - list_move_tail(&child->mnt_hash, &mnt->mnt_hash); + if (child && list_empty(&child->mnt_mounts)) { + hlist_del_init_rcu(&child->mnt_hash); + hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); + } } } @@ -350,11 +354,11 @@ static void __propagate_umount(struct mount *mnt) * * vfsmount lock must be held for write */ -int propagate_umount(struct list_head *list) +int propagate_umount(struct hlist_head *list) { struct mount *mnt; - list_for_each_entry(mnt, list, mnt_hash) + hlist_for_each_entry(mnt, list, mnt_hash) __propagate_umount(mnt); return 0; } diff --git a/fs/pnode.h b/fs/pnode.h index 59e7eda1851..fc28a27fa89 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct mount *mnt) void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, - struct list_head *); -int propagate_umount(struct list_head *); + struct hlist_head *); +int propagate_umount(struct hlist_head *); int propagate_mount_busy(struct mount *, int); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); diff --git a/fs/proc/base.c b/fs/proc/base.c index 51507065263..b9760628e1f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1824,6 +1824,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = -ENOENT; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { diff --git a/fs/proc/page.c b/fs/proc/page.c index 02174a61031..e647c55275d 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -121,9 +121,8 @@ u64 stable_page_flags(struct page *page) * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page) && - (PageLRU(compound_trans_head(page)) || - PageAnon(compound_trans_head(page)))) + else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || + PageAnon(compound_head(page)))) u |= 1 << KPF_THP; /* diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 831d49a4111..cfc8dcc1604 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb, dqstats_inc(DQST_LOOKUPS); dqput(old_dquot); old_dquot = dquot; - ret = fn(dquot, priv); - if (ret < 0) - goto out; + /* + * ->release_dquot() can be racing with us. Our reference + * protects us from new calls to it so just wait for any + * outstanding call and recheck the DQ_ACTIVE_B after that. + */ + wait_on_dquot(dquot); + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + ret = fn(dquot, priv); + if (ret < 0) + goto out; + } spin_lock(&dq_list_lock); /* We are safe to continue now because our dquot could not * be moved out of the inuse list while we hold the reference */ diff --git a/fs/read_write.c b/fs/read_write.c index edc5746a902..31c6efa4318 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -264,10 +264,22 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); +static inline struct fd fdget_pos(int fd) +{ + return __to_fd(__fdget_pos(fd)); +} + +static inline void fdput_pos(struct fd f) +{ + if (f.flags & FDPUT_POS_UNLOCK) + mutex_unlock(&f.file->f_pos_lock); + fdput(f); +} + SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) { off_t retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -278,7 +290,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } - fdput(f); + fdput_pos(f); return retval; } @@ -295,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned int, whence) { int retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); loff_t offset; if (!f.file) @@ -315,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, retval = 0; } out_putf: - fdput(f); + fdput_pos(f); return retval; } #endif @@ -498,7 +510,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -506,7 +518,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) ret = vfs_read(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; } @@ -514,7 +526,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -522,7 +534,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, ret = vfs_write(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; @@ -797,7 +809,7 @@ EXPORT_SYMBOL(vfs_writev); SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -805,7 +817,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_readv(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -817,7 +829,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -825,7 +837,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_writev(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -968,7 +980,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -978,13 +990,13 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, ret = compat_readv(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } -COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, - const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos) +static long __compat_sys_preadv64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos) { struct fd f; ssize_t ret; @@ -1001,12 +1013,22 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, return ret; } +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 +COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos) +{ + return __compat_sys_preadv64(fd, vec, vlen, pos); +} +#endif + COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return compat_sys_preadv64(fd, vec, vlen, pos); + + return __compat_sys_preadv64(fd, vec, vlen, pos); } static size_t compat_writev(struct file *file, @@ -1035,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, const struct compat_iovec __user *, vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -1045,13 +1067,13 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, ret = compat_writev(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } -COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, - const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos) +static long __compat_sys_pwritev64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos) { struct fd f; ssize_t ret; @@ -1068,12 +1090,22 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, return ret; } +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 +COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos) +{ + return __compat_sys_pwritev64(fd, vec, vlen, pos); +} +#endif + COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return compat_sys_pwritev64(fd, vec, vlen, pos); + + return __compat_sys_pwritev64(fd, vec, vlen, pos); } #endif diff --git a/fs/sync.c b/fs/sync.c index e8ba024a055..b28d1dd10e8 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,11 +27,10 @@ * wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. */ -static int __sync_filesystem(struct super_block *sb, int wait, - unsigned long start) +static int __sync_filesystem(struct super_block *sb, int wait) { if (wait) - sync_inodes_sb(sb, start); + sync_inodes_sb(sb); else writeback_inodes_sb(sb, WB_REASON_SYNC); @@ -48,7 +47,6 @@ static int __sync_filesystem(struct super_block *sb, int wait, int sync_filesystem(struct super_block *sb) { int ret; - unsigned long start = jiffies; /* * We need to be protected against the filesystem going from @@ -62,17 +60,17 @@ int sync_filesystem(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - ret = __sync_filesystem(sb, 0, start); + ret = __sync_filesystem(sb, 0); if (ret < 0) return ret; - return __sync_filesystem(sb, 1, start); + return __sync_filesystem(sb, 1); } EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - sync_inodes_sb(sb, *((unsigned long *)arg)); + sync_inodes_sb(sb); } static void sync_fs_one_sb(struct super_block *sb, void *arg) @@ -104,10 +102,9 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; - unsigned long start = jiffies; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(sync_inodes_one_sb, &start); + iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 6211230814f..3eaf5c6622e 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -27,6 +27,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, { struct dentry *root; void *ns; + bool new_sb; if (!(flags & MS_KERNMOUNT)) { if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) @@ -37,8 +38,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, } ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); - root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns); - if (IS_ERR(root)) + root = kernfs_mount_ns(fs_type, flags, sysfs_root, &new_sb, ns); + if (IS_ERR(root) || !new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); return root; } diff --git a/fs/udf/file.c b/fs/udf/file.c index c02a27a19c6..1037637957c 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -144,6 +144,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, size_t count = iocb->ki_nbytes; struct udf_inode_info *iinfo = UDF_I(inode); + mutex_lock(&inode->i_mutex); down_write(&iinfo->i_data_sem); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { if (file->f_flags & O_APPEND) @@ -156,6 +157,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos + count)) { err = udf_expand_file_adinicb(inode); if (err) { + mutex_unlock(&inode->i_mutex); udf_debug("udf_expand_adinicb: err=%d\n", err); return err; } @@ -169,9 +171,17 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } else up_write(&iinfo->i_data_sem); - retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); - if (retval > 0) + retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + + if (retval > 0) { + ssize_t err; + mark_inode_dirty(inode); + err = generic_write_sync(file, iocb->ki_pos - retval, retval); + if (err < 0) + retval = err; + } return retval; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 062b7925bca..982ce05c87e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -265,6 +265,7 @@ int udf_expand_file_adinicb(struct inode *inode) .nr_to_write = 1, }; + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); if (!iinfo->i_lenAlloc) { if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f35d5c953ff..9ddfb8190ca 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -705,7 +705,6 @@ xfs_setattr_size( { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; xfs_off_t oldsize, newsize; struct xfs_trans *tp; int error; @@ -726,8 +725,8 @@ xfs_setattr_size( ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; @@ -736,7 +735,7 @@ xfs_setattr_size( * Short circuit the truncate case for zero length files. */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) + if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) return 0; /* @@ -824,10 +823,11 @@ xfs_setattr_size( * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ - if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + if (newsize != oldsize && + !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; + iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } /* @@ -863,9 +863,9 @@ xfs_setattr_size( xfs_inode_clear_eofblocks_tag(ip); } - if (mask & ATTR_MODE) + if (iattr->ia_valid & ATTR_MODE) xfs_setattr_mode(ip, iattr); - if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index cdebd832c3d..4ef6fdbced7 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -205,16 +205,25 @@ xlog_cil_insert_format_items( /* * We 64-bit align the length of each iovec so that the start * of the next one is naturally aligned. We'll need to - * account for that slack space here. + * account for that slack space here. Then round nbytes up + * to 64-bit alignment so that the initial buffer alignment is + * easy to calculate and verify. */ nbytes += niovecs * sizeof(uint64_t); + nbytes = round_up(nbytes, sizeof(uint64_t)); /* grab the old item if it exists for reservation accounting */ old_lv = lip->li_lv; - /* calc buffer size */ - buf_size = sizeof(struct xfs_log_vec) + nbytes + - niovecs * sizeof(struct xfs_log_iovec); + /* + * The data buffer needs to start 64-bit aligned, so round up + * that space to ensure we can align it appropriately and not + * overrun the buffer. + */ + buf_size = nbytes + + round_up((sizeof(struct xfs_log_vec) + + niovecs * sizeof(struct xfs_log_iovec)), + sizeof(uint64_t)); /* compare to existing item size */ if (lip->li_lv && buf_size <= lip->li_lv->lv_size) { @@ -251,6 +260,8 @@ xlog_cil_insert_format_items( /* The allocated data region lies beyond the iovec region */ lv->lv_buf_len = 0; lv->lv_buf = (char *)lv + buf_size - nbytes; + ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); + lip->li_ops->iop_format(lip, lv); insert: ASSERT(lv->lv_buf_len <= nbytes); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 02df7b408a2..f96c05669a9 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -282,22 +282,29 @@ xfs_readsb( struct xfs_sb *sbp = &mp->m_sb; int error; int loud = !(flags & XFS_MFSI_QUIET); + const struct xfs_buf_ops *buf_ops; ASSERT(mp->m_sb_bp == NULL); ASSERT(mp->m_ddev_targp != NULL); /* + * For the initial read, we must guess at the sector + * size based on the block device. It's enough to + * get the sb_sectsize out of the superblock and + * then reread with the proper length. + * We don't verify it yet, because it may not be complete. + */ + sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); + buf_ops = NULL; + + /* * Allocate a (locked) buffer to hold the superblock. * This will be kept around at all times to optimize * access to the superblock. */ - sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - reread: bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), 0, - loud ? &xfs_sb_buf_ops - : &xfs_sb_quiet_buf_ops); + BTOBB(sector_size), 0, buf_ops); if (!bp) { if (loud) xfs_warn(mp, "SB buffer read failed"); @@ -328,12 +335,13 @@ reread: } /* - * If device sector size is smaller than the superblock size, - * re-read the superblock so the buffer is correctly sized. + * Re-read the superblock so the buffer is correctly sized, + * and properly verified. */ - if (sector_size < sbp->sb_sectsize) { + if (buf_ops == NULL) { xfs_buf_relse(bp); sector_size = sbp->sb_sectsize; + buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops; goto reread; } diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index b7c9aea77f8..1e116794bb6 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -295,8 +295,7 @@ xfs_mount_validate_sb( sbp->sb_dblocks == 0 || sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { - XFS_CORRUPTION_ERROR("SB sanity check failed", - XFS_ERRLEVEL_LOW, mp, sbp); + xfs_notice(mp, "SB sanity check failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -611,10 +610,10 @@ xfs_sb_read_verify( XFS_SB_VERSION_5) || dsb->sb_crc != 0)) { - if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), + if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), offsetof(struct xfs_sb, sb_crc))) { /* Only fail bad secondaries on a known V5 filesystem */ - if (bp->b_bn != XFS_SB_DADDR && + if (bp->b_bn == XFS_SB_DADDR || xfs_sb_version_hascrc(&mp->m_sb)) { error = EFSCORRUPTED; goto out_error; @@ -625,7 +624,7 @@ xfs_sb_read_verify( out_error: if (error) { - if (error != EWRONGFS) + if (error == EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, error); @@ -644,7 +643,6 @@ xfs_sb_quiet_read_verify( { struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); - if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ xfs_sb_read_verify(bp); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f317488263d..d971f4932b5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -913,7 +913,7 @@ xfs_flush_inodes( struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb, jiffies); + sync_inodes_sb(sb); up_read(&sb->s_umount); } } |