diff options
Diffstat (limited to 'fs')
103 files changed, 1731 insertions, 1944 deletions
diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 40cc54ced5d..2f967549109 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -101,6 +101,18 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, #else /* CONFIG_9P_FSCACHE */ +static inline void v9fs_cache_inode_get_cookie(struct inode *inode) +{ +} + +static inline void v9fs_cache_inode_put_cookie(struct inode *inode) +{ +} + +static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file) +{ +} + static inline int v9fs_fscache_release_page(struct page *page, gfp_t gfp) { return 1; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index aa5ecf479a5..a0df3e73c2b 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -105,10 +105,8 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9inode->writeback_fid = (void *) fid; } mutex_unlock(&v9inode->v_mutex); -#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, file); -#endif return 0; out_error: p9_client_clunk(file->private_data); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 94de6d1482e..4e65aa90334 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -448,9 +448,7 @@ void v9fs_evict_inode(struct inode *inode) clear_inode(inode); filemap_fdatawrite(inode->i_mapping); -#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_put_cookie(inode); -#endif /* clunk the fid stashed in writeback_fid */ if (v9inode->writeback_fid) { p9_client_clunk(v9inode->writeback_fid); @@ -531,9 +529,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, goto error; v9fs_stat2inode(st, inode, sb); -#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_get_cookie(inode); -#endif unlock_new_inode(inode); return inode; error: @@ -905,10 +901,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, goto error; file->private_data = fid; -#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(dentry->d_inode, file); -#endif *opened |= FILE_CREATED; out: diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a7c481402c4..4c10edec26a 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -141,9 +141,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, goto error; v9fs_stat2inode_dotl(st, inode); -#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_get_cookie(inode); -#endif retval = v9fs_get_acl(inode, fid); if (retval) goto error; @@ -355,10 +353,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (err) goto err_clunk_old_fid; file->private_data = ofid; -#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, file); -#endif *opened |= FILE_CREATED; out: v9fs_put_acl(dacl, pacl); diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 585adafb0cc..c770337c4b4 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -43,9 +43,12 @@ struct adfs_dir_ops; * ADFS file system superblock data in memory */ struct adfs_sb_info { - struct adfs_discmap *s_map; /* bh list containing map */ - struct adfs_dir_ops *s_dir; /* directory operations */ - + union { struct { + struct adfs_discmap *s_map; /* bh list containing map */ + struct adfs_dir_ops *s_dir; /* directory operations */ + }; + struct rcu_head rcu; /* used only at shutdown time */ + }; kuid_t s_uid; /* owner uid */ kgid_t s_gid; /* owner gid */ umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 0ff4bae2c2a..7b3003cb6f1 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -123,8 +123,7 @@ static void adfs_put_super(struct super_block *sb) for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); - kfree(asb); - sb->s_fs_info = NULL; + kfree_rcu(asb, rcu); } static int adfs_show_options(struct seq_file *seq, struct dentry *root) @@ -36,10 +36,10 @@ #include <linux/eventfd.h> #include <linux/blkdev.h> #include <linux/compat.h> -#include <linux/anon_inodes.h> #include <linux/migrate.h> #include <linux/ramfs.h> #include <linux/percpu-refcount.h> +#include <linux/mount.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -152,12 +152,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; +static struct vfsmount *aio_mnt; + +static const struct file_operations aio_ring_fops; +static const struct address_space_operations aio_ctx_aops; + +static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) +{ + struct qstr this = QSTR_INIT("[aio]", 5); + struct file *file; + struct path path; + struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + inode->i_mapping->a_ops = &aio_ctx_aops; + inode->i_mapping->private_data = ctx; + inode->i_size = PAGE_SIZE * nr_pages; + + path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); + if (!path.dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + path.mnt = mntget(aio_mnt); + + d_instantiate(path.dentry, inode); + file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); + if (IS_ERR(file)) { + path_put(&path); + return file; + } + + file->f_flags = O_RDWR; + file->private_data = ctx; + return file; +} + +static struct dentry *aio_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + static const struct dentry_operations ops = { + .d_dname = simple_dname, + }; + return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); +} + /* aio_setup * Creates the slab caches used by the aio routines, panic on * failure as this is done early during the boot sequence. */ static int __init aio_setup(void) { + static struct file_system_type aio_fs = { + .name = "aio", + .mount = aio_mount, + .kill_sb = kill_anon_super, + }; + aio_mnt = kern_mount(&aio_fs); + if (IS_ERR(aio_mnt)) + panic("Failed to create aio fs mount."); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -283,16 +338,12 @@ static int aio_setup_ring(struct kioctx *ctx) if (nr_pages < 0) return -EINVAL; - file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR); + file = aio_private_file(ctx, nr_pages); if (IS_ERR(file)) { ctx->aio_ring_file = NULL; return -EAGAIN; } - file->f_inode->i_mapping->a_ops = &aio_ctx_aops; - file->f_inode->i_mapping->private_data = ctx; - file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages; - for (i = 0; i < nr_pages; i++) { struct page *page; page = find_or_create_page(file->f_inode->i_mapping, diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 85c96184995..24084732b1d 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -24,7 +24,6 @@ static struct vfsmount *anon_inode_mnt __read_mostly; static struct inode *anon_inode_inode; -static const struct file_operations anon_inode_fops; /* * anon_inodefs_dname() is called from d_path(). @@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { .d_dname = anon_inodefs_dname, }; -/* - * nop .set_page_dirty method so that people can use .page_mkwrite on - * anon inodes. - */ -static int anon_set_page_dirty(struct page *page) -{ - return 0; -}; - -static const struct address_space_operations anon_aops = { - .set_page_dirty = anon_set_page_dirty, -}; - -/* - * A single inode exists for all anon_inode files. Contrary to pipes, - * anon_inode inodes have no associated per-instance data, so we need - * only allocate one of them. - */ -static struct inode *anon_inode_mkinode(struct super_block *s) -{ - struct inode *inode = new_inode_pseudo(s); - - if (!inode) - return ERR_PTR(-ENOMEM); - - inode->i_ino = get_next_ino(); - inode->i_fop = &anon_inode_fops; - - inode->i_mapping->a_ops = &anon_aops; - - /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_flags |= S_PRIVATE; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; -} - static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); if (!IS_ERR(root)) { struct super_block *s = root->d_sb; - anon_inode_inode = anon_inode_mkinode(s); + anon_inode_inode = alloc_anon_inode(s); if (IS_ERR(anon_inode_inode)) { dput(root); deactivate_locked_super(s); @@ -109,72 +63,6 @@ static struct file_system_type anon_inode_fs_type = { }; /** - * anon_inode_getfile_private - creates a new file instance by hooking it up to an - * anonymous inode, and a dentry that describe the "class" - * of the file - * - * @name: [in] name of the "class" of the new file - * @fops: [in] file operations for the new file - * @priv: [in] private data for the new file (will be file's private_data) - * @flags: [in] flags - * - * - * Similar to anon_inode_getfile, but each file holds a single inode. - * - */ -struct file *anon_inode_getfile_private(const char *name, - const struct file_operations *fops, - void *priv, int flags) -{ - struct qstr this; - struct path path; - struct file *file; - struct inode *inode; - - if (fops->owner && !try_module_get(fops->owner)) - return ERR_PTR(-ENOENT); - - inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb); - if (IS_ERR(inode)) { - file = ERR_PTR(-ENOMEM); - goto err_module; - } - - /* - * Link the inode to a directory entry by creating a unique name - * using the inode sequence number. - */ - file = ERR_PTR(-ENOMEM); - this.name = name; - this.len = strlen(name); - this.hash = 0; - path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); - if (!path.dentry) - goto err_module; - - path.mnt = mntget(anon_inode_mnt); - - d_instantiate(path.dentry, inode); - - file = alloc_file(&path, OPEN_FMODE(flags), fops); - if (IS_ERR(file)) - goto err_dput; - - file->f_mapping = inode->i_mapping; - file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); - file->private_data = priv; - - return file; - -err_dput: - path_put(&path); -err_module: - module_put(fops->owner); - return file; -} -EXPORT_SYMBOL_GPL(anon_inode_getfile_private); - -/** * anon_inode_getfile - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" * of the file diff --git a/fs/attr.c b/fs/attr.c index 1449adb14ef..267968d9467 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -167,7 +167,27 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) } EXPORT_SYMBOL(setattr_copy); -int notify_change(struct dentry * dentry, struct iattr * attr) +/** + * notify_change - modify attributes of a filesytem object + * @dentry: object affected + * @iattr: new attributes + * @delegated_inode: returns inode, if the inode is delegated + * + * The caller must hold the i_mutex on the affected object. + * + * If notify_change discovers a delegation in need of breaking, + * it will return -EWOULDBLOCK and return a reference to the inode in + * delegated_inode. The caller should then break the delegation and + * retry. Because breaking a delegation may take a long time, the + * caller should drop the i_mutex before doing so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. Also, passing NULL is fine for callers holding + * the file open for write, as there can be no conflicting delegation in + * that case. + */ +int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; @@ -243,6 +263,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr) error = security_inode_setattr(dentry, attr); if (error) return error; + error = try_break_deleg(inode, delegated_inode); + if (error) + return error; if (inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3f1128b37e4..4218e26df91 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -122,6 +122,7 @@ struct autofs_sb_info { spinlock_t lookup_lock; struct list_head active_list; struct list_head expiring_list; + struct rcu_head rcu; }; static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) @@ -271,7 +272,7 @@ void autofs4_clean_ino(struct autofs_info *); static inline int autofs_prepare_pipe(struct file *pipe) { - if (!pipe->f_op || !pipe->f_op->write) + if (!pipe->f_op->write) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) return -EINVAL; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 0f00da329e7..1818ce7f5a0 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -658,12 +658,6 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use goto out; } - if (!fp->f_op) { - err = -ENOTTY; - fput(fp); - goto out; - } - sbi = autofs_dev_ioctl_sbi(fp); if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { err = -EINVAL; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index b104726e2d0..3b9cc9b973c 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -56,18 +56,13 @@ void autofs4_kill_sb(struct super_block *sb) * just call kill_anon_super when we are called from * deactivate_super. */ - if (!sbi) - goto out_kill_sb; - - /* Free wait queues, close pipe */ - autofs4_catatonic_mode(sbi); - - sb->s_fs_info = NULL; - kfree(sbi); + if (sbi) /* Free wait queues, close pipe */ + autofs4_catatonic_mode(sbi); -out_kill_sb: DPRINTK("shutting down"); kill_litter_super(sb); + if (sbi) + kfree_rcu(sbi, rcu); } static int autofs4_show_options(struct seq_file *m, struct dentry *root) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index e9c75e20db3..daa15d6ba45 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode); static int befs_init_inodecache(void); static void befs_destroy_inodecache(void); static void *befs_follow_link(struct dentry *, struct nameidata *); -static void befs_put_link(struct dentry *, struct nameidata *, void *); +static void *befs_fast_follow_link(struct dentry *, struct nameidata *); static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, @@ -79,10 +79,15 @@ static const struct address_space_operations befs_aops = { .bmap = befs_bmap, }; +static const struct inode_operations befs_fast_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = befs_fast_follow_link, +}; + static const struct inode_operations befs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = befs_follow_link, - .put_link = befs_put_link, + .put_link = kfree_put_link, }; /* @@ -411,7 +416,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &befs_dir_inode_operations; inode->i_fop = &befs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &befs_symlink_inode_operations; + if (befs_ino->i_flags & BEFS_LONG_SYMLINK) + inode->i_op = &befs_symlink_inode_operations; + else + inode->i_op = &befs_fast_symlink_inode_operations; } else { befs_error(sb, "Inode %lu is not a regular file, " "directory or symlink. THAT IS WRONG! BeFS has no " @@ -477,47 +485,40 @@ befs_destroy_inodecache(void) static void * befs_follow_link(struct dentry *dentry, struct nameidata *nd) { + struct super_block *sb = dentry->d_sb; befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); + befs_data_stream *data = &befs_ino->i_data.ds; + befs_off_t len = data->size; char *link; - if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { - struct super_block *sb = dentry->d_sb; - befs_data_stream *data = &befs_ino->i_data.ds; - befs_off_t len = data->size; + if (len == 0) { + befs_error(sb, "Long symlink with illegal length"); + link = ERR_PTR(-EIO); + } else { + befs_debug(sb, "Follow long symlink"); - if (len == 0) { - befs_error(sb, "Long symlink with illegal length"); + link = kmalloc(len, GFP_NOFS); + if (!link) { + link = ERR_PTR(-ENOMEM); + } else if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); link = ERR_PTR(-EIO); } else { - befs_debug(sb, "Follow long symlink"); - - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); - link = ERR_PTR(-EIO); - } else { - link[len - 1] = '\0'; - } + link[len - 1] = '\0'; } - } else { - link = befs_ino->i_data.symlink; } - nd_set_link(nd, link); return NULL; } -static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) + +static void * +befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); - if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { - char *link = nd_get_link(nd); - if (!IS_ERR(link)) - kfree(link); - } + nd_set_link(nd, befs_ino->i_data.symlink); + return NULL; } /* diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 89dec7f789a..ca0ba15a730 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -45,7 +45,6 @@ static int load_aout_library(struct file*); */ static int aout_core_dump(struct coredump_params *cprm) { - struct file *file = cprm->file; mm_segment_t fs; int has_dumped = 0; void __user *dump_start; @@ -85,10 +84,10 @@ static int aout_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); /* struct user */ - if (!dump_write(file, &dump, sizeof(dump))) + if (!dump_emit(cprm, &dump, sizeof(dump))) goto end_coredump; /* Now dump all of the user data. Include malloced stuff as well */ - if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump))) + if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump))) goto end_coredump; /* now we start writing out the user space info */ set_fs(USER_DS); @@ -96,14 +95,14 @@ static int aout_core_dump(struct coredump_params *cprm) if (dump.u_dsize != 0) { dump_start = START_DATA(dump); dump_size = dump.u_dsize << PAGE_SHIFT; - if (!dump_write(file, dump_start, dump_size)) + if (!dump_emit(cprm, dump_start, dump_size)) goto end_coredump; } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); dump_size = dump.u_ssize << PAGE_SHIFT; - if (!dump_write(file, dump_start, dump_size)) + if (!dump_emit(cprm, dump_start, dump_size)) goto end_coredump; } end_coredump: @@ -221,7 +220,7 @@ static int load_aout_binary(struct linux_binprm * bprm) * Requires a mmap handler. This prevents people from using a.out * as part of an exploit attack against /proc-related vulnerabilities. */ - if (!bprm->file->f_op || !bprm->file->f_op->mmap) + if (!bprm->file->f_op->mmap) return -ENOEXEC; fd_offset = N_TXTOFF(ex); @@ -374,7 +373,7 @@ static int load_aout_library(struct file *file) * Requires a mmap handler. This prevents people from using a.out * as part of an exploit attack against /proc-related vulnerabilities. */ - if (!file->f_op || !file->f_op->mmap) + if (!file->f_op->mmap) goto out; if (N_FLAGS(ex)) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 4c94a79991b..571a4232690 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, goto out; if (!elf_check_arch(interp_elf_ex)) goto out; - if (!interpreter->f_op || !interpreter->f_op->mmap) + if (!interpreter->f_op->mmap) goto out; /* @@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; if (!elf_check_arch(&loc->elf_ex)) goto out; - if (!bprm->file->f_op || !bprm->file->f_op->mmap) + if (!bprm->file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file) /* First of all, some simple consistency checks */ if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || - !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) + !elf_check_arch(&elf_ex) || !file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -1225,35 +1225,17 @@ static int notesize(struct memelfnote *en) return sz; } -#define DUMP_WRITE(addr, nr, foffset) \ - do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) - -static int alignfile(struct file *file, loff_t *foffset) -{ - static const char buf[4] = { 0, }; - DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); - return 1; -} - -static int writenote(struct memelfnote *men, struct file *file, - loff_t *foffset) +static int writenote(struct memelfnote *men, struct coredump_params *cprm) { struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - DUMP_WRITE(&en, sizeof(en), foffset); - DUMP_WRITE(men->name, en.n_namesz, foffset); - if (!alignfile(file, foffset)) - return 0; - DUMP_WRITE(men->data, men->datasz, foffset); - if (!alignfile(file, foffset)) - return 0; - - return 1; + return dump_emit(cprm, &en, sizeof(en)) && + dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && + dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); } -#undef DUMP_WRITE static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags) @@ -1392,7 +1374,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) } static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, - siginfo_t *siginfo) + const siginfo_t *siginfo) { mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); @@ -1599,7 +1581,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - siginfo_t *siginfo, struct pt_regs *regs) + const siginfo_t *siginfo, struct pt_regs *regs) { struct task_struct *dump_task = current; const struct user_regset_view *view = task_user_regset_view(dump_task); @@ -1702,7 +1684,7 @@ static size_t get_note_info_size(struct elf_note_info *info) * process-wide notes are interleaved after the first thread-specific note. */ static int write_note_info(struct elf_note_info *info, - struct file *file, loff_t *foffset) + struct coredump_params *cprm) { bool first = 1; struct elf_thread_core_info *t = info->thread; @@ -1710,22 +1692,22 @@ static int write_note_info(struct elf_note_info *info, do { int i; - if (!writenote(&t->notes[0], file, foffset)) + if (!writenote(&t->notes[0], cprm)) return 0; - if (first && !writenote(&info->psinfo, file, foffset)) + if (first && !writenote(&info->psinfo, cprm)) return 0; - if (first && !writenote(&info->signote, file, foffset)) + if (first && !writenote(&info->signote, cprm)) return 0; - if (first && !writenote(&info->auxv, file, foffset)) + if (first && !writenote(&info->auxv, cprm)) return 0; if (first && info->files.data && - !writenote(&info->files, file, foffset)) + !writenote(&info->files, cprm)) return 0; for (i = 1; i < info->thread_notes; ++i) if (t->notes[i].data && - !writenote(&t->notes[i], file, foffset)) + !writenote(&t->notes[i], cprm)) return 0; first = 0; @@ -1848,34 +1830,31 @@ static int elf_note_info_init(struct elf_note_info *info) static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - siginfo_t *siginfo, struct pt_regs *regs) + const siginfo_t *siginfo, struct pt_regs *regs) { struct list_head *t; + struct core_thread *ct; + struct elf_thread_status *ets; if (!elf_note_info_init(info)) return 0; - if (siginfo->si_signo) { - struct core_thread *ct; - struct elf_thread_status *ets; - - for (ct = current->mm->core_state->dumper.next; - ct; ct = ct->next) { - ets = kzalloc(sizeof(*ets), GFP_KERNEL); - if (!ets) - return 0; + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + ets = kzalloc(sizeof(*ets), GFP_KERNEL); + if (!ets) + return 0; - ets->thread = ct->task; - list_add(&ets->list, &info->thread_list); - } + ets->thread = ct->task; + list_add(&ets->list, &info->thread_list); + } - list_for_each(t, &info->thread_list) { - int sz; + list_for_each(t, &info->thread_list) { + int sz; - ets = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(siginfo->si_signo, ets); - info->thread_status_size += sz; - } + ets = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(siginfo->si_signo, ets); + info->thread_status_size += sz; } /* now collect the dump for the current */ memset(info->prstatus, 0, sizeof(*info->prstatus)); @@ -1935,13 +1914,13 @@ static size_t get_note_info_size(struct elf_note_info *info) } static int write_note_info(struct elf_note_info *info, - struct file *file, loff_t *foffset) + struct coredump_params *cprm) { int i; struct list_head *t; for (i = 0; i < info->numnote; i++) - if (!writenote(info->notes + i, file, foffset)) + if (!writenote(info->notes + i, cprm)) return 0; /* write out the thread status notes section */ @@ -1950,7 +1929,7 @@ static int write_note_info(struct elf_note_info *info, list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], file, foffset)) + if (!writenote(&tmp->notes[i], cprm)) return 0; } @@ -2046,10 +2025,9 @@ static int elf_core_dump(struct coredump_params *cprm) int has_dumped = 0; mm_segment_t fs; int segs; - size_t size = 0; struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff, foffset; + loff_t offset = 0, dataoff; struct elf_note_info info = { }; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; @@ -2105,7 +2083,6 @@ static int elf_core_dump(struct coredump_params *cprm) offset += sizeof(*elf); /* Elf header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ - foffset = offset; /* Write notes phdr entry */ { @@ -2136,13 +2113,10 @@ static int elf_core_dump(struct coredump_params *cprm) offset = dataoff; - size += sizeof(*elf); - if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + if (!dump_emit(cprm, elf, sizeof(*elf))) goto end_coredump; - size += sizeof(*phdr4note); - if (size > cprm->limit - || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) + if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) goto end_coredump; /* Write program headers for segments dump */ @@ -2164,24 +2138,22 @@ static int elf_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - size += sizeof(phdr); - if (size > cprm->limit - || !dump_write(cprm->file, &phdr, sizeof(phdr))) + if (!dump_emit(cprm, &phdr, sizeof(phdr))) goto end_coredump; } - if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; /* write out the notes section */ - if (!write_note_info(&info, cprm->file, &foffset)) + if (!write_note_info(&info, cprm)) goto end_coredump; - if (elf_coredump_extra_notes_write(cprm->file, &foffset)) + if (elf_coredump_extra_notes_write(cprm)) goto end_coredump; /* Align to page */ - if (!dump_seek(cprm->file, dataoff - foffset)) + if (!dump_skip(cprm, dataoff - cprm->written)) goto end_coredump; for (vma = first_vma(current, gate_vma); vma != NULL; @@ -2198,26 +2170,21 @@ static int elf_core_dump(struct coredump_params *cprm) page = get_dump_page(addr); if (page) { void *kaddr = kmap(page); - stop = ((size += PAGE_SIZE) > cprm->limit) || - !dump_write(cprm->file, kaddr, - PAGE_SIZE); + stop = !dump_emit(cprm, kaddr, PAGE_SIZE); kunmap(page); page_cache_release(page); } else - stop = !dump_seek(cprm->file, PAGE_SIZE); + stop = !dump_skip(cprm, PAGE_SIZE); if (stop) goto end_coredump; } } - if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + if (!elf_core_write_extra_data(cprm)) goto end_coredump; if (e_phnum == PN_XNUM) { - size += sizeof(*shdr4extnum); - if (size > cprm->limit - || !dump_write(cprm->file, shdr4extnum, - sizeof(*shdr4extnum))) + if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) goto end_coredump; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c166f325a18..fe2a643ee00 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) return 0; if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr)) return 0; - if (!file->f_op || !file->f_op->mmap) + if (!file->f_op->mmap) return 0; return 1; } @@ -1267,35 +1267,17 @@ static int notesize(struct memelfnote *en) /* #define DEBUG */ -#define DUMP_WRITE(addr, nr, foffset) \ - do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) - -static int alignfile(struct file *file, loff_t *foffset) -{ - static const char buf[4] = { 0, }; - DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); - return 1; -} - -static int writenote(struct memelfnote *men, struct file *file, - loff_t *foffset) +static int writenote(struct memelfnote *men, struct coredump_params *cprm) { struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - DUMP_WRITE(&en, sizeof(en), foffset); - DUMP_WRITE(men->name, en.n_namesz, foffset); - if (!alignfile(file, foffset)) - return 0; - DUMP_WRITE(men->data, men->datasz, foffset); - if (!alignfile(file, foffset)) - return 0; - - return 1; + return dump_emit(cprm, &en, sizeof(en)) && + dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && + dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); } -#undef DUMP_WRITE static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) { @@ -1500,66 +1482,40 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, /* * dump the segments for an MMU process */ -#ifdef CONFIG_MMU -static int elf_fdpic_dump_segments(struct file *file, size_t *size, - unsigned long *limit, unsigned long mm_flags) +static bool elf_fdpic_dump_segments(struct coredump_params *cprm) { struct vm_area_struct *vma; - int err = 0; for (vma = current->mm->mmap; vma; vma = vma->vm_next) { unsigned long addr; - if (!maydump(vma, mm_flags)) + if (!maydump(vma, cprm->mm_flags)) continue; +#ifdef CONFIG_MMU for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { + bool res; struct page *page = get_dump_page(addr); if (page) { void *kaddr = kmap(page); - *size += PAGE_SIZE; - if (*size > *limit) - err = -EFBIG; - else if (!dump_write(file, kaddr, PAGE_SIZE)) - err = -EIO; + res = dump_emit(cprm, kaddr, PAGE_SIZE); kunmap(page); page_cache_release(page); - } else if (!dump_seek(file, PAGE_SIZE)) - err = -EFBIG; - if (err) - goto out; + } else { + res = dump_skip(cprm, PAGE_SIZE); + } + if (!res) + return false; } - } -out: - return err; -} -#endif - -/* - * dump the segments for a NOMMU process - */ -#ifndef CONFIG_MMU -static int elf_fdpic_dump_segments(struct file *file, size_t *size, - unsigned long *limit, unsigned long mm_flags) -{ - struct vm_area_struct *vma; - - for (vma = current->mm->mmap; vma; vma = vma->vm_next) { - if (!maydump(vma, mm_flags)) - continue; - - if ((*size += PAGE_SIZE) > *limit) - return -EFBIG; - - if (!dump_write(file, (void *) vma->vm_start, +#else + if (!dump_emit(cprm, (void *) vma->vm_start, vma->vm_end - vma->vm_start)) - return -EIO; + return false; +#endif } - - return 0; + return true; } -#endif static size_t elf_core_vma_data_size(unsigned long mm_flags) { @@ -1585,11 +1541,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) int has_dumped = 0; mm_segment_t fs; int segs; - size_t size = 0; int i; struct vm_area_struct *vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff, foffset; + loff_t offset = 0, dataoff; int numnote; struct memelfnote *notes = NULL; struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ @@ -1606,6 +1561,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; + struct core_thread *ct; + struct elf_thread_status *tmp; /* * We no longer stop all VM operations. @@ -1641,28 +1598,23 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto cleanup; #endif - if (cprm->siginfo->si_signo) { - struct core_thread *ct; - struct elf_thread_status *tmp; - - for (ct = current->mm->core_state->dumper.next; - ct; ct = ct->next) { - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - goto cleanup; + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + goto cleanup; - tmp->thread = ct->task; - list_add(&tmp->list, &thread_list); - } + tmp->thread = ct->task; + list_add(&tmp->list, &thread_list); + } - list_for_each(t, &thread_list) { - struct elf_thread_status *tmp; - int sz; + list_for_each(t, &thread_list) { + struct elf_thread_status *tmp; + int sz; - tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); - thread_status_size += sz; - } + tmp = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); + thread_status_size += sz; } /* now collect the dump for the current */ @@ -1720,7 +1672,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) offset += sizeof(*elf); /* Elf header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ - foffset = offset; /* Write notes phdr entry */ { @@ -1755,13 +1706,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) offset = dataoff; - size += sizeof(*elf); - if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + if (!dump_emit(cprm, elf, sizeof(*elf))) goto end_coredump; - size += sizeof(*phdr4note); - if (size > cprm->limit - || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) + if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) goto end_coredump; /* write program headers for segments dump */ @@ -1785,18 +1733,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - size += sizeof(phdr); - if (size > cprm->limit - || !dump_write(cprm->file, &phdr, sizeof(phdr))) + if (!dump_emit(cprm, &phdr, sizeof(phdr))) goto end_coredump; } - if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; /* write out the notes section */ for (i = 0; i < numnote; i++) - if (!writenote(notes + i, cprm->file, &foffset)) + if (!writenote(notes + i, cprm)) goto end_coredump; /* write out the thread status notes section */ @@ -1805,25 +1751,21 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], cprm->file, &foffset)) + if (!writenote(&tmp->notes[i], cprm)) goto end_coredump; } - if (!dump_seek(cprm->file, dataoff - foffset)) + if (!dump_skip(cprm, dataoff - cprm->written)) goto end_coredump; - if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit, - cprm->mm_flags) < 0) + if (!elf_fdpic_dump_segments(cprm)) goto end_coredump; - if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + if (!elf_core_write_extra_data(cprm)) goto end_coredump; if (e_phnum == PN_XNUM) { - size += sizeof(*shdr4extnum); - if (size > cprm->limit - || !dump_write(cprm->file, shdr4extnum, - sizeof(*shdr4extnum))) + if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) goto end_coredump; } diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 037a3e2b045..f37b08cea1f 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm) /* First of all, some simple consistency checks */ if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || - (!bprm->file->f_op || !bprm->file->f_op->mmap)) { + !bprm->file->f_op->mmap) { return -ENOEXEC; } diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 00baf141998..57e17fe6121 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object) _debug("discard tail %llx", oi_size); newattrs.ia_valid = ATTR_SIZE; newattrs.ia_size = oi_size & PAGE_MASK; - ret = notify_change(object->backer, &newattrs); + ret = notify_change(object->backer, &newattrs, NULL); if (ret < 0) goto truncate_failed; } newattrs.ia_valid = ATTR_SIZE; newattrs.ia_size = ni_size; - ret = notify_change(object->backer, &newattrs); + ret = notify_change(object->backer, &newattrs, NULL); truncate_failed: mutex_unlock(&object->backer->d_inode->i_mutex); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f4a08d7fa2f..ca65f39dc8d 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, if (ret < 0) { cachefiles_io_error(cache, "Unlink security error"); } else { - ret = vfs_unlink(dir->d_inode, rep); + ret = vfs_unlink(dir->d_inode, rep, NULL); if (preemptive) cachefiles_mark_object_buried(cache, rep); @@ -396,7 +396,7 @@ try_again: cachefiles_io_error(cache, "Rename security error %d", ret); } else { ret = vfs_rename(dir->d_inode, rep, - cache->graveyard->d_inode, grave); + cache->graveyard->d_inode, grave, NULL); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); diff --git a/fs/char_dev.c b/fs/char_dev.c index afc2bb69178..94b5f60076d 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -368,6 +368,7 @@ void cdev_put(struct cdev *p) */ static int chrdev_open(struct inode *inode, struct file *filp) { + const struct file_operations *fops; struct cdev *p; struct cdev *new = NULL; int ret = 0; @@ -400,10 +401,11 @@ static int chrdev_open(struct inode *inode, struct file *filp) return ret; ret = -ENXIO; - filp->f_op = fops_get(p->ops); - if (!filp->f_op) + fops = fops_get(p->ops); + if (!fops) goto out_cdev_put; + replace_fops(filp, fops); if (filp->f_op->open) { ret = filp->f_op->open(inode, filp); if (ret) diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 37e4a72a7d1..9409fa10bd5 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -65,5 +65,6 @@ struct cifs_sb_info { char *mountdata; /* options received at mount time or via DFS refs */ struct backing_dev_info bdi; struct delayed_work prune_tlinks; + struct rcu_head rcu; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 77fc5e18107..849f6132b32 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -862,7 +862,7 @@ const struct inode_operations cifs_file_inode_ops = { const struct inode_operations cifs_symlink_inode_ops = { .readlink = generic_readlink, .follow_link = cifs_follow_link, - .put_link = cifs_put_link, + .put_link = kfree_put_link, .permission = cifs_permission, /* BB add the following two eventually */ /* revalidate: cifs_revalidate, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 6d0b07217ac..26a754f49ba 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -115,8 +115,6 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path); /* Functions related to symlinks */ extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); -extern void cifs_put_link(struct dentry *direntry, - struct nameidata *nd, void *); extern int cifs_readlink(struct dentry *direntry, char __user *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 62a55147400..8813ff776ba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3770,6 +3770,13 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, return rc; } +static void delayed_free(struct rcu_head *p) +{ + struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu); + unload_nls(sbi->local_nls); + kfree(sbi); +} + void cifs_umount(struct cifs_sb_info *cifs_sb) { @@ -3794,8 +3801,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb) bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb->mountdata); - unload_nls(cifs_sb->local_nls); - kfree(cifs_sb); + call_rcu(&cifs_sb->rcu, delayed_free); } int diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 7e36ceba0c7..cc0234710dd 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -621,10 +621,3 @@ symlink_exit: free_xid(xid); return rc; } - -void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) -{ - char *p = nd_get_link(nd); - if (!IS_ERR(p)) - kfree(p); -} diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index cc0ea9fe5ec..e7550cb9fb7 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations; int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); int coda_permission(struct inode *inode, int mask); -int coda_revalidate_inode(struct dentry *); +int coda_revalidate_inode(struct inode *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 190effc6a6f..5efbb5ee0ad 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -387,9 +387,6 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op) - return -ENOTDIR; - if (host_file->f_op->iterate) { struct inode *host_inode = file_inode(host_file); mutex_lock(&host_inode->i_mutex); @@ -566,13 +563,12 @@ static int coda_dentry_delete(const struct dentry * dentry) * cache manager Venus issues a downcall to the kernel when this * happens */ -int coda_revalidate_inode(struct dentry *dentry) +int coda_revalidate_inode(struct inode *inode) { struct coda_vattr attr; int error; int old_mode; ino_t old_ino; - struct inode *inode = dentry->d_inode; struct coda_inode_info *cii = ITOC(inode); if (!cii->c_flags) diff --git a/fs/coda/file.c b/fs/coda/file.c index 380b798f844..9e83b779021 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->read) + if (!host_file->f_op->read) return -EINVAL; return host_file->f_op->read(host_file, buf, count, ppos); @@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->write) + if (!host_file->f_op->write) return -EINVAL; host_inode = file_inode(host_file); @@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->mmap) + if (!host_file->f_op->mmap) return -ENODEV; coda_inode = file_inode(coda_file); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 4dcc0d81a7a..506de34a4ef 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode) int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - int err = coda_revalidate_inode(dentry); + int err = coda_revalidate_inode(dentry->d_inode); if (!err) generic_fillattr(dentry->d_inode, stat); return err; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 5d19acfa7c6..dc52e13d58e 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, /*FALL THROUGH*/ default: - if (f.file->f_op && f.file->f_op->compat_ioctl) { + if (f.file->f_op->compat_ioctl) { error = f.file->f_op->compat_ioctl(f.file, cmd, arg); if (error != -ENOIOCTLCMD) goto out_fput; } - if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) + if (!f.file->f_op->unlocked_ioctl) goto do_ioctl; break; } diff --git a/fs/coredump.c b/fs/coredump.c index 9bdeca12ae0..62406b6959b 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) return err; } -void do_coredump(siginfo_t *siginfo) +void do_coredump(const siginfo_t *siginfo) { struct core_state core_state; struct core_name cn; @@ -645,7 +645,7 @@ void do_coredump(siginfo_t *siginfo) */ if (!uid_eq(inode->i_uid, current_fsuid())) goto close_fail; - if (!cprm.file->f_op || !cprm.file->f_op->write) + if (!cprm.file->f_op->write) goto close_fail; if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; @@ -685,40 +685,55 @@ fail: * do on a core-file: use only these functions to write out all the * necessary info. */ -int dump_write(struct file *file, const void *addr, int nr) +int dump_emit(struct coredump_params *cprm, const void *addr, int nr) { - return !dump_interrupted() && - access_ok(VERIFY_READ, addr, nr) && - file->f_op->write(file, addr, nr, &file->f_pos) == nr; + struct file *file = cprm->file; + loff_t pos = file->f_pos; + ssize_t n; + if (cprm->written + nr > cprm->limit) + return 0; + while (nr) { + if (dump_interrupted()) + return 0; + n = vfs_write(file, addr, nr, &pos); + if (n <= 0) + return 0; + file->f_pos = pos; + cprm->written += n; + nr -= n; + } + return 1; } -EXPORT_SYMBOL(dump_write); +EXPORT_SYMBOL(dump_emit); -int dump_seek(struct file *file, loff_t off) +int dump_skip(struct coredump_params *cprm, size_t nr) { - int ret = 1; - + static char zeroes[PAGE_SIZE]; + struct file *file = cprm->file; if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (cprm->written + nr > cprm->limit) + return 0; if (dump_interrupted() || - file->f_op->llseek(file, off, SEEK_CUR) < 0) + file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->written += nr; + return 1; } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) { - ret = 0; - break; - } - off -= n; + while (nr > PAGE_SIZE) { + if (!dump_emit(cprm, zeroes, PAGE_SIZE)) + return 0; + nr -= PAGE_SIZE; } - free_page((unsigned long)buf); + return dump_emit(cprm, zeroes, nr); } - return ret; } -EXPORT_SYMBOL(dump_seek); +EXPORT_SYMBOL(dump_skip); + +int dump_align(struct coredump_params *cprm, int align) +{ + unsigned mod = cprm->written & (align - 1); + if (align & (align - 1)) + return -EINVAL; + return mod ? dump_skip(cprm, align - mod) : 0; +} +EXPORT_SYMBOL(dump_align); diff --git a/fs/dcache.c b/fs/dcache.c index ae6ebb88cef..1f24cd684c5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -343,6 +343,7 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + __d_clear_type(dentry); dentry->d_inode = NULL; hlist_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); @@ -483,27 +484,6 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) return parent; } -/* - * Unhash a dentry without inserting an RCU walk barrier or checking that - * dentry->d_lock is locked. The caller must take care of that, if - * appropriate. - */ -static void __d_shrink(struct dentry *dentry) -{ - if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); - } -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -522,7 +502,21 @@ static void __d_shrink(struct dentry *dentry) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - __d_shrink(dentry); + struct hlist_bl_head *b; + /* + * Hashed dentries are normally on the dentry hashtable, + * with the exception of those newly allocated by + * d_obtain_alias, which are always IS_ROOT: + */ + if (unlikely(IS_ROOT(dentry))) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); dentry_rcuwalk_barrier(dentry); } } @@ -1076,116 +1070,6 @@ void shrink_dcache_sb(struct super_block *sb) EXPORT_SYMBOL(shrink_dcache_sb); /* - * destroy a single subtree of dentries for unmount - * - see the comments on shrink_dcache_for_umount() for a description of the - * locking - */ -static void shrink_dcache_for_umount_subtree(struct dentry *dentry) -{ - struct dentry *parent; - - BUG_ON(!IS_ROOT(dentry)); - - for (;;) { - /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - - /* consume the dentries from this leaf up through its parents - * until we find one with children or run out altogether */ - do { - struct inode *inode; - - /* - * inform the fs that this dentry is about to be - * unhashed and destroyed. - */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && - !d_unhashed(dentry)) - dentry->d_op->d_prune(dentry); - - dentry_lru_del(dentry); - __d_shrink(dentry); - - if (dentry->d_lockref.count != 0) { - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", - dentry, - dentry->d_inode ? - dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, - dentry->d_lockref.count, - dentry->d_sb->s_type->name, - dentry->d_sb->s_id); - BUG(); - } - - if (IS_ROOT(dentry)) { - parent = NULL; - list_del(&dentry->d_u.d_child); - } else { - parent = dentry->d_parent; - parent->d_lockref.count--; - list_del(&dentry->d_u.d_child); - } - - inode = dentry->d_inode; - if (inode) { - dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); - if (dentry->d_op && dentry->d_op->d_iput) - dentry->d_op->d_iput(dentry, inode); - else - iput(inode); - } - - d_free(dentry); - - /* finished when we fall off the top of the tree, - * otherwise we ascend to the parent and move to the - * next sibling if there is one */ - if (!parent) - return; - dentry = parent; - } while (list_empty(&dentry->d_subdirs)); - - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } -} - -/* - * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock because: - * - the superblock is detached from all mountings and open files, so the - * dentry trees will not be rearranged by the VFS - * - s_umount is write-locked, so the memory pressure shrinker will ignore - * any dentries belonging to this superblock that it comes across - * - the filesystem itself is no longer permitted to rearrange the dentries - * in this superblock - */ -void shrink_dcache_for_umount(struct super_block *sb) -{ - struct dentry *dentry; - - if (down_read_trylock(&sb->s_umount)) - BUG(); - - dentry = sb->s_root; - sb->s_root = NULL; - dentry->d_lockref.count--; - shrink_dcache_for_umount_subtree(dentry); - - while (!hlist_bl_empty(&sb->s_anon)) { - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); - shrink_dcache_for_umount_subtree(dentry); - } -} - -/* * This tries to ascend one level of parenthood, but * we can race with renaming, so we need to re-check * the parenthood after dropping the lock and check @@ -1478,6 +1362,91 @@ void shrink_dcache_parent(struct dentry *parent) } EXPORT_SYMBOL(shrink_dcache_parent); +static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry) +{ + struct select_data *data = _data; + enum d_walk_ret ret = D_WALK_CONTINUE; + + if (dentry->d_lockref.count) { + dentry_lru_del(dentry); + if (likely(!list_empty(&dentry->d_subdirs))) + goto out; + if (dentry == data->start && dentry->d_lockref.count == 1) + goto out; + printk(KERN_ERR + "BUG: Dentry %p{i=%lx,n=%s}" + " still in use (%d)" + " [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry->d_name.name, + dentry->d_lockref.count, + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + BUG(); + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { + /* + * We can't use d_lru_shrink_move() because we + * need to get the global LRU lock and do the + * LRU accounting. + */ + if (dentry->d_flags & DCACHE_LRU_LIST) + d_lru_del(dentry); + d_shrink_add(dentry, &data->dispose); + data->found++; + ret = D_WALK_NORETRY; + } +out: + if (data->found && need_resched()) + ret = D_WALK_QUIT; + return ret; +} + +/* + * destroy the dentries attached to a superblock on unmounting + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + if (down_read_trylock(&sb->s_umount)) + BUG(); + + dentry = sb->s_root; + sb->s_root = NULL; + for (;;) { + struct select_data data; + + INIT_LIST_HEAD(&data.dispose); + data.start = dentry; + data.found = 0; + + d_walk(dentry, &data, umount_collect, NULL); + if (!data.found) + break; + + shrink_dentry_list(&data.dispose); + cond_resched(); + } + d_drop(dentry); + dput(dentry); + + while (!hlist_bl_empty(&sb->s_anon)) { + struct select_data data; + dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); + + INIT_LIST_HEAD(&data.dispose); + data.start = NULL; + data.found = 0; + + d_walk(dentry, &data, umount_collect, NULL); + if (data.found) + shrink_dentry_list(&data.dispose); + cond_resched(); + } +} + static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1638,12 +1607,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +/** + * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) + * @sb: the superblock + * @name: qstr of the name + * + * For a filesystem that just pins its dentries in memory and never + * performs lookups at all, return an unhashed IS_ROOT dentry. + */ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { - struct dentry *dentry = __d_alloc(sb, name); - if (dentry) - dentry->d_flags |= DCACHE_DISCONNECTED; - return dentry; + return __d_alloc(sb, name); } EXPORT_SYMBOL(d_alloc_pseudo); @@ -1685,14 +1659,42 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) } EXPORT_SYMBOL(d_set_d_op); +static unsigned d_flags_for_inode(struct inode *inode) +{ + unsigned add_flags = DCACHE_FILE_TYPE; + + if (!inode) + return DCACHE_MISS_TYPE; + + if (S_ISDIR(inode->i_mode)) { + add_flags = DCACHE_DIRECTORY_TYPE; + if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { + if (unlikely(!inode->i_op->lookup)) + add_flags = DCACHE_AUTODIR_TYPE; + else + inode->i_opflags |= IOP_LOOKUP; + } + } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (unlikely(inode->i_op->follow_link)) + add_flags = DCACHE_SYMLINK_TYPE; + else + inode->i_opflags |= IOP_NOFOLLOW; + } + + if (unlikely(IS_AUTOMOUNT(inode))) + add_flags |= DCACHE_NEED_AUTOMOUNT; + return add_flags; +} + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { + unsigned add_flags = d_flags_for_inode(inode); + spin_lock(&dentry->d_lock); - if (inode) { - if (unlikely(IS_AUTOMOUNT(inode))) - dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; + dentry->d_flags &= ~DCACHE_ENTRY_TYPE; + dentry->d_flags |= add_flags; + if (inode) hlist_add_head(&dentry->d_alias, &inode->i_dentry); - } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); @@ -1801,6 +1803,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); +/** + * d_instantiate_no_diralias - instantiate a non-aliased dentry + * @entry: dentry to complete + * @inode: inode to attach to this dentry + * + * Fill in inode information in the entry. If a directory alias is found, then + * return an error (and drop inode). Together with d_materialise_unique() this + * guarantees that a directory inode may never have more than one alias. + */ +int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) +{ + BUG_ON(!hlist_unhashed(&entry->d_alias)); + + spin_lock(&inode->i_lock); + if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { + spin_unlock(&inode->i_lock); + iput(inode); + return -EBUSY; + } + __d_instantiate(entry, inode); + spin_unlock(&inode->i_lock); + security_d_instantiate(entry, inode); + + return 0; +} +EXPORT_SYMBOL(d_instantiate_no_diralias); + struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; @@ -1870,6 +1899,7 @@ struct dentry *d_obtain_alias(struct inode *inode) static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; struct dentry *res; + unsigned add_flags; if (!inode) return ERR_PTR(-ESTALE); @@ -1895,9 +1925,11 @@ struct dentry *d_obtain_alias(struct inode *inode) } /* attach a disconnected dentry */ + add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; + spin_lock(&tmp->d_lock); tmp->d_inode = inode; - tmp->d_flags |= DCACHE_DISCONNECTED; + tmp->d_flags |= add_flags; hlist_add_head(&tmp->d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); @@ -2725,7 +2757,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) spin_unlock(&dentry->d_lock); /* anon->d_lock still locked, returns locked */ - anon->d_flags &= ~DCACHE_DISCONNECTED; } /** @@ -2885,23 +2916,28 @@ static int prepend_path(const struct path *path, struct vfsmount *vfsmnt = path->mnt; struct mount *mnt = real_mount(vfsmnt); int error = 0; - unsigned seq = 0; + unsigned seq, m_seq = 0; char *bptr; int blen; rcu_read_lock(); +restart_mnt: + read_seqbegin_or_lock(&mount_lock, &m_seq); + seq = 0; restart: bptr = *buffer; blen = *buflen; + error = 0; read_seqbegin_or_lock(&rename_lock, &seq); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); /* Global root? */ - if (mnt_has_parent(mnt)) { - dentry = mnt->mnt_mountpoint; - mnt = mnt->mnt_parent; + if (mnt != parent) { + dentry = ACCESS_ONCE(mnt->mnt_mountpoint); + mnt = parent; vfsmnt = &mnt->mnt; continue; } @@ -2935,6 +2971,11 @@ restart: goto restart; } done_seqretry(&rename_lock, seq); + if (need_seqretry(&mount_lock, m_seq)) { + m_seq = 1; + goto restart_mnt; + } + done_seqretry(&mount_lock, m_seq); if (error >= 0 && bptr == *buffer) { if (--blen < 0) @@ -2971,9 +3012,7 @@ char *__d_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); error = prepend_path(path, root, &res, &buflen); - br_read_unlock(&vfsmount_lock); if (error < 0) return ERR_PTR(error); @@ -2990,9 +3029,7 @@ char *d_absolute_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); error = prepend_path(path, &root, &res, &buflen); - br_read_unlock(&vfsmount_lock); if (error > 1) error = -EINVAL; @@ -3067,9 +3104,7 @@ char *d_path(const struct path *path, char *buf, int buflen) rcu_read_lock(); get_fs_root_rcu(current->fs, &root); - br_read_lock(&vfsmount_lock); error = path_with_deleted(path, &root, &res, &buflen); - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3224,7 +3259,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); error = -ENOENT; - br_read_lock(&vfsmount_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; char *cwd = page + PATH_MAX; @@ -3232,7 +3266,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3253,7 +3286,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -EFAULT; } } else { - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); } diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index bf12ba5dd22..4000f6b3a75 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -44,15 +44,15 @@ */ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *lower_dentry; - int rc = 1; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + int rc; + + if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) + return 1; if (flags & LOOKUP_RCU) return -ECHILD; - lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) - goto out; rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); if (dentry->d_inode) { struct inode *lower_inode = @@ -60,12 +60,17 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) fsstack_copy_attr_all(dentry->d_inode, lower_inode); } -out: return rc; } struct kmem_cache *ecryptfs_dentry_info_cache; +static void ecryptfs_dentry_free_rcu(struct rcu_head *head) +{ + kmem_cache_free(ecryptfs_dentry_info_cache, + container_of(head, struct ecryptfs_dentry_info, rcu)); +} + /** * ecryptfs_d_release * @dentry: The ecryptfs dentry @@ -74,15 +79,11 @@ struct kmem_cache *ecryptfs_dentry_info_cache; */ static void ecryptfs_d_release(struct dentry *dentry) { - if (ecryptfs_dentry_to_private(dentry)) { - if (ecryptfs_dentry_to_lower(dentry)) { - dput(ecryptfs_dentry_to_lower(dentry)); - mntput(ecryptfs_dentry_to_lower_mnt(dentry)); - } - kmem_cache_free(ecryptfs_dentry_info_cache, - ecryptfs_dentry_to_private(dentry)); + struct ecryptfs_dentry_info *p = dentry->d_fsdata; + if (p) { + path_put(&p->lower_path); + call_rcu(&p->rcu, ecryptfs_dentry_free_rcu); } - return; } const struct dentry_operations ecryptfs_dops = { diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index df19d34a033..90d1882b306 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -261,7 +261,10 @@ struct ecryptfs_inode_info { * vfsmount too. */ struct ecryptfs_dentry_info { struct path lower_path; - struct ecryptfs_crypt_stat *crypt_stat; + union { + struct ecryptfs_crypt_stat *crypt_stat; + struct rcu_head rcu; + }; }; /** @@ -512,13 +515,6 @@ ecryptfs_dentry_to_lower(struct dentry *dentry) return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry; } -static inline void -ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry) -{ - ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry = - lower_dentry; -} - static inline struct vfsmount * ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) { @@ -531,13 +527,6 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry) return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; } -static inline void -ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) -{ - ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt = - lower_mnt; -} - #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); __printf(1, 2) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 992cf95830b..2229a74aeee 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td) { struct file *lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op && lower_file->f_op->flush) { + if (lower_file->f_op->flush) { filemap_write_and_wait(file->f_mapping); return lower_file->f_op->flush(lower_file, td); } @@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) struct file *lower_file = NULL; lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op && lower_file->f_op->fasync) + if (lower_file->f_op->fasync) rc = lower_file->f_op->fasync(fd, lower_file, flag); return rc; } @@ -318,7 +318,7 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ecryptfs_file_to_private(file)) lower_file = ecryptfs_file_to_lower(file); - if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl) + if (lower_file->f_op->unlocked_ioctl) rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); return rc; } @@ -332,7 +332,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ecryptfs_file_to_private(file)) lower_file = ecryptfs_file_to_lower(file); - if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl) + if (lower_file->f_op && lower_file->f_op->compat_ioctl) rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); return rc; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 67e9b633969..c36c4482447 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry); + rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); goto out_unlock; @@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode, inode = __ecryptfs_get_inode(lower_dentry->d_inode, directory_inode->i_sb); if (IS_ERR(inode)) { - vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); + vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL); goto out_lock; } fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); @@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, BUG_ON(!d_count(lower_dentry)); ecryptfs_set_dentry_private(dentry, dentry_info); - ecryptfs_set_dentry_lower(dentry, lower_dentry); - ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); + dentry_info->lower_path.mnt = lower_mnt; + dentry_info->lower_path.dentry = lower_dentry; if (!lower_dentry->d_inode) { /* We want to add because we couldn't find in lower */ @@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, dget(lower_new_dentry); lower_dir_dentry = lock_parent(lower_new_dentry); rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, - lower_new_dentry); + lower_new_dentry, NULL); if (rc || !lower_new_dentry->d_inode) goto out_lock; rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); @@ -640,7 +640,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_lock; } rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, - lower_new_dir_dentry->d_inode, lower_new_dentry); + lower_new_dir_dentry->d_inode, lower_new_dentry, + NULL); if (rc) goto out_lock; if (target_inode) @@ -703,16 +704,6 @@ out: return NULL; } -static void -ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) -{ - char *buf = nd_get_link(nd); - if (!IS_ERR(buf)) { - /* Free the char* */ - kfree(buf); - } -} - /** * upper_size_to_lower_size * @crypt_stat: Crypt_stat associated with file @@ -891,7 +882,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = notify_change(lower_dentry, &lower_ia); + rc = notify_change(lower_dentry, &lower_ia, NULL); mutex_unlock(&lower_dentry->d_inode->i_mutex); } return rc; @@ -992,7 +983,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) lower_ia.ia_valid &= ~ATTR_MODE; mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = notify_change(lower_dentry, &lower_ia); + rc = notify_change(lower_dentry, &lower_ia, NULL); mutex_unlock(&lower_dentry->d_inode->i_mutex); out: fsstack_copy_attr_all(inode, lower_inode); @@ -1121,7 +1112,7 @@ out: const struct inode_operations ecryptfs_symlink_iops = { .readlink = generic_readlink, .follow_link = ecryptfs_follow_link, - .put_link = ecryptfs_put_link, + .put_link = kfree_put_link, .permission = ecryptfs_permission, .setattr = ecryptfs_setattr, .getattr = ecryptfs_getattr_link, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index eb1c5979eca..1b119d3bf92 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -585,8 +585,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags /* ->kill_sb() will take care of root_info */ ecryptfs_set_dentry_private(s->s_root, root_info); - ecryptfs_set_dentry_lower(s->s_root, path.dentry); - ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt); + root_info->lower_path = path; s->s_flags |= MS_ACTIVE; return dget(s->s_root); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 810c28fb8c3..983e3960abf 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1814,7 +1814,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* The target file descriptor must support poll */ error = -EPERM; - if (!tf.file->f_op || !tf.file->f_op->poll) + if (!tf.file->f_op->poll) goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ diff --git a/fs/exec.c b/fs/exec.c index 2ea437e5acf..be4c81c7251 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -106,6 +106,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt) */ SYSCALL_DEFINE1(uselib, const char __user *, library) { + struct linux_binfmt *fmt; struct file *file; struct filename *tmp = getname(library); int error = PTR_ERR(tmp); @@ -136,24 +137,21 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) fsnotify_open(file); error = -ENOEXEC; - if(file->f_op) { - struct linux_binfmt * fmt; - read_lock(&binfmt_lock); - list_for_each_entry(fmt, &formats, lh) { - if (!fmt->load_shlib) - continue; - if (!try_module_get(fmt->module)) - continue; - read_unlock(&binfmt_lock); - error = fmt->load_shlib(file); - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (error != -ENOEXEC) - break; - } + read_lock(&binfmt_lock); + list_for_each_entry(fmt, &formats, lh) { + if (!fmt->load_shlib) + continue; + if (!try_module_get(fmt->module)) + continue; read_unlock(&binfmt_lock); + error = fmt->load_shlib(file); + read_lock(&binfmt_lock); + put_binfmt(fmt); + if (error != -ENOEXEC) + break; } + read_unlock(&binfmt_lock); exit: fput(file); out: @@ -1277,13 +1275,10 @@ static int check_unsafe_exec(struct linux_binprm *bprm) */ int prepare_binprm(struct linux_binprm *bprm) { - umode_t mode; - struct inode * inode = file_inode(bprm->file); + struct inode *inode = file_inode(bprm->file); + umode_t mode = inode->i_mode; int retval; - mode = inode->i_mode; - if (bprm->file->f_op == NULL) - return -EACCES; /* clear any previous set[ug]id data from a previous binary */ bprm->cred->euid = current_euid(); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index a235f001688..48a359dd286 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -69,145 +69,162 @@ find_acceptable_alias(struct dentry *result, return NULL; } -/* - * Find root of a disconnected subtree and return a reference to it. - */ -static struct dentry * -find_disconnected_root(struct dentry *dentry) +static bool dentry_connected(struct dentry *dentry) { dget(dentry); - while (!IS_ROOT(dentry)) { + while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); - if (!(parent->d_flags & DCACHE_DISCONNECTED)) { + dput(dentry); + if (IS_ROOT(dentry)) { dput(parent); - break; + return false; } + dentry = parent; + } + dput(dentry); + return true; +} + +static void clear_disconnected(struct dentry *dentry) +{ + dget(dentry); + while (dentry->d_flags & DCACHE_DISCONNECTED) { + struct dentry *parent = dget_parent(dentry); + + WARN_ON_ONCE(IS_ROOT(dentry)); + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_DISCONNECTED; + spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } - return dentry; + dput(dentry); +} + +/* + * Reconnect a directory dentry with its parent. + * + * This can return a dentry, or NULL, or an error. + * + * In the first case the returned dentry is the parent of the given + * dentry, and may itself need to be reconnected to its parent. + * + * In the NULL case, a concurrent VFS operation has either renamed or + * removed this directory. The concurrent operation has reconnected our + * dentry, so we no longer need to. + */ +static struct dentry *reconnect_one(struct vfsmount *mnt, + struct dentry *dentry, char *nbuf) +{ + struct dentry *parent; + struct dentry *tmp; + int err; + + parent = ERR_PTR(-EACCES); + mutex_lock(&dentry->d_inode->i_mutex); + if (mnt->mnt_sb->s_export_op->get_parent) + parent = mnt->mnt_sb->s_export_op->get_parent(dentry); + mutex_unlock(&dentry->d_inode->i_mutex); + + if (IS_ERR(parent)) { + dprintk("%s: get_parent of %ld failed, err %d\n", + __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + return parent; + } + + dprintk("%s: find name of %lu in %lu\n", __func__, + dentry->d_inode->i_ino, parent->d_inode->i_ino); + err = exportfs_get_name(mnt, parent, nbuf, dentry); + if (err == -ENOENT) + goto out_reconnected; + if (err) + goto out_err; + dprintk("%s: found name: %s\n", __func__, nbuf); + mutex_lock(&parent->d_inode->i_mutex); + tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); + mutex_unlock(&parent->d_inode->i_mutex); + if (IS_ERR(tmp)) { + dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + goto out_err; + } + if (tmp != dentry) { + dput(tmp); + goto out_reconnected; + } + dput(tmp); + if (IS_ROOT(dentry)) { + err = -ESTALE; + goto out_err; + } + return parent; + +out_err: + dput(parent); + return ERR_PTR(err); +out_reconnected: + dput(parent); + /* + * Someone must have renamed our entry into another parent, in + * which case it has been reconnected by the rename. + * + * Or someone removed it entirely, in which case filehandle + * lookup will succeed but the directory is now IS_DEAD and + * subsequent operations on it will fail. + * + * Alternatively, maybe there was no race at all, and the + * filesystem is just corrupt and gave us a parent that doesn't + * actually contain any entry pointing to this inode. So, + * double check that this worked and return -ESTALE if not: + */ + if (!dentry_connected(dentry)) + return ERR_PTR(-ESTALE); + return NULL; } /* * Make sure target_dir is fully connected to the dentry tree. * - * It may already be, as the flag isn't always updated when connection happens. + * On successful return, DCACHE_DISCONNECTED will be cleared on + * target_dir, and target_dir->d_parent->...->d_parent will reach the + * root of the filesystem. + * + * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. + * But the converse is not true: target_dir may have DCACHE_DISCONNECTED + * set but already be connected. In that case we'll verify the + * connection to root and then clear the flag. + * + * Note that target_dir could be removed by a concurrent operation. In + * that case reconnect_path may still succeed with target_dir fully + * connected, but further operations using the filehandle will fail when + * necessary (due to S_DEAD being set on the directory). */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { - int noprogress = 0; - int err = -ESTALE; + struct dentry *dentry, *parent; - /* - * It is possible that a confused file system might not let us complete - * the path to the root. For example, if get_parent returns a directory - * in which we cannot find a name for the child. While this implies a - * very sick filesystem we don't want it to cause knfsd to spin. Hence - * the noprogress counter. If we go through the loop 10 times (2 is - * probably enough) without getting anywhere, we just give up - */ - while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { - struct dentry *pd = find_disconnected_root(target_dir); - - if (!IS_ROOT(pd)) { - /* must have found a connected parent - great */ - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else if (pd == mnt->mnt_sb->s_root) { - printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else { - /* - * We have hit the top of a disconnected path, try to - * find parent and connect. - * - * Racing with some other process renaming a directory - * isn't much of a problem here. If someone renames - * the directory, it will end up properly connected, - * which is what we want - * - * Getting the parent can't be supported generically, - * the locking is too icky. - * - * Instead we just return EACCES. If server reboots - * or inodes get flushed, you lose - */ - struct dentry *ppd = ERR_PTR(-EACCES); - struct dentry *npd; - - mutex_lock(&pd->d_inode->i_mutex); - if (mnt->mnt_sb->s_export_op->get_parent) - ppd = mnt->mnt_sb->s_export_op->get_parent(pd); - mutex_unlock(&pd->d_inode->i_mutex); - - if (IS_ERR(ppd)) { - err = PTR_ERR(ppd); - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, pd->d_inode->i_ino, err); - dput(pd); - break; - } + dentry = dget(target_dir); - dprintk("%s: find name of %lu in %lu\n", __func__, - pd->d_inode->i_ino, ppd->d_inode->i_ino); - err = exportfs_get_name(mnt, ppd, nbuf, pd); - if (err) { - dput(ppd); - dput(pd); - if (err == -ENOENT) - /* some race between get_parent and - * get_name? just try again - */ - continue; - break; - } - dprintk("%s: found name: %s\n", __func__, nbuf); - mutex_lock(&ppd->d_inode->i_mutex); - npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); - mutex_unlock(&ppd->d_inode->i_mutex); - if (IS_ERR(npd)) { - err = PTR_ERR(npd); - dprintk("%s: lookup failed: %d\n", - __func__, err); - dput(ppd); - dput(pd); - break; - } - /* we didn't really want npd, we really wanted - * a side-effect of the lookup. - * hopefully, npd == pd, though it isn't really - * a problem if it isn't - */ - if (npd == pd) - noprogress = 0; - else - printk("%s: npd != pd\n", __func__); - dput(npd); - dput(ppd); - if (IS_ROOT(pd)) { - /* something went wrong, we have to give up */ - dput(pd); - break; - } - } - dput(pd); - } + while (dentry->d_flags & DCACHE_DISCONNECTED) { + BUG_ON(dentry == mnt->mnt_sb->s_root); - if (target_dir->d_flags & DCACHE_DISCONNECTED) { - /* something went wrong - oh-well */ - if (!err) - err = -ESTALE; - return err; - } + if (IS_ROOT(dentry)) + parent = reconnect_one(mnt, dentry, nbuf); + else + parent = dget_parent(dentry); + if (!parent) + break; + dput(dentry); + if (IS_ERR(parent)) + return PTR_ERR(parent); + dentry = parent; + } + dput(dentry); + clear_disconnected(target_dir); return 0; } @@ -215,7 +232,7 @@ struct getdents_callback { struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ - unsigned long ino; /* the inum we are looking for */ + u64 ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; @@ -255,10 +272,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child) struct inode *dir = path->dentry->d_inode; int error; struct file *file; + struct kstat stat; + struct path child_path = { + .mnt = path->mnt, + .dentry = child, + }; struct getdents_callback buffer = { .ctx.actor = filldir_one, .name = name, - .ino = child->d_inode->i_ino }; error = -ENOTDIR; @@ -268,6 +289,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child) if (!dir->i_fop) goto out; /* + * inode->i_ino is unsigned long, kstat->ino is u64, so the + * former would be insufficient on 32-bit hosts when the + * filesystem supports 64-bit inode numbers. So we need to + * actually call ->getattr, not just read i_ino: + */ + error = vfs_getattr_nosec(&child_path, &stat); + if (error) + return error; + buffer.ino = stat.ino; + /* * Open the directory ... */ file = dentry_open(path, O_RDONLY, cred); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af815ea9d7c..d01d62315f7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2734,8 +2734,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first, struct inode *second); extern void ext4_double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode); -void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2); -void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2); extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 start_orig, __u64 start_donor, __u64 len, __u64 *moved_len); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a569d335f80..60589b60e9b 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb, /* Protect orig inodes against a truncate and make sure, * that only 1 swap_inode_boot_loader is running. */ - ext4_inode_double_lock(inode, inode_bl); + lock_two_nondirectories(inode, inode_bl); truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); @@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode_bl); - ext4_inode_double_unlock(inode, inode_bl); + unlock_two_nondirectories(inode, inode_bl); iput(inode_bl); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 7fa4d855dbd..773b503bd18 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1203,42 +1203,6 @@ mext_check_arguments(struct inode *orig_inode, } /** - * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2 - * - * @inode1: the inode structure - * @inode2: the inode structure - * - * Lock two inodes' i_mutex - */ -void -ext4_inode_double_lock(struct inode *inode1, struct inode *inode2) -{ - BUG_ON(inode1 == inode2); - if (inode1 < inode2) { - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); - } -} - -/** - * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2 - * - * @inode1: the inode that is released first - * @inode2: the inode that is released second - * - */ - -void -ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2) -{ - mutex_unlock(&inode1->i_mutex); - mutex_unlock(&inode2->i_mutex); -} - -/** * ext4_move_extents - Exchange the specified range of a file * * @o_filp: file structure of the original file @@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } /* Protect orig and donor inodes against a truncate */ - ext4_inode_double_lock(orig_inode, donor_inode); + lock_two_nondirectories(orig_inode, donor_inode); /* Wait for all existing dio workers */ ext4_inode_block_unlocked_dio(orig_inode); @@ -1535,7 +1499,7 @@ out: ext4_double_up_write_data_sem(orig_inode, donor_inode); ext4_inode_resume_unlocked_dio(orig_inode); ext4_inode_resume_unlocked_dio(donor_inode); - ext4_inode_double_unlock(orig_inode, donor_inode); + unlock_two_nondirectories(orig_inode, donor_inode); return ret; } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 4241e6f39e8..7c31f4bc74a 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -102,6 +102,7 @@ struct msdos_sb_info { struct hlist_head dir_hashtable[FAT_HASH_SIZE]; unsigned int dirty; /* fs state before mount */ + struct rcu_head rcu; }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0062da21dd8..854b578f669 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -548,6 +548,16 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void delayed_free(struct rcu_head *p) +{ + struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); + unload_nls(sbi->nls_disk); + unload_nls(sbi->nls_io); + if (sbi->options.iocharset != fat_default_iocharset) + kfree(sbi->options.iocharset); + kfree(sbi); +} + static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -557,14 +567,7 @@ static void fat_put_super(struct super_block *sb) iput(sbi->fsinfo_inode); iput(sbi->fat_inode); - unload_nls(sbi->nls_disk); - unload_nls(sbi->nls_io); - - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); - - sb->s_fs_info = NULL; - kfree(sbi); + call_rcu(&sbi->rcu, delayed_free); } static struct kmem_cache *fat_inode_cachep; diff --git a/fs/fcntl.c b/fs/fcntl.c index 65343c3741f..ef6866592a0 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -56,7 +56,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) return -EINVAL; } - if (filp->f_op && filp->f_op->check_flags) + if (filp->f_op->check_flags) error = filp->f_op->check_flags(arg); if (error) return error; @@ -64,8 +64,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) /* * ->fasync() is responsible for setting the FASYNC bit. */ - if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && - filp->f_op->fasync) { + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); if (error < 0) goto out; diff --git a/fs/file_table.c b/fs/file_table.c index e900ca51863..5fff9030be3 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -36,8 +36,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DEFINE_STATIC_LGLOCK(files_lglock); - /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -134,7 +132,6 @@ struct file *get_empty_filp(void) return ERR_PTR(error); } - INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); @@ -240,11 +237,11 @@ static void __fput(struct file *file) locks_remove_flock(file); if (unlikely(file->f_flags & FASYNC)) { - if (file->f_op && file->f_op->fasync) + if (file->f_op->fasync) file->f_op->fasync(-1, file, 0); } ima_file_free(file); - if (file->f_op && file->f_op->release) + if (file->f_op->release) file->f_op->release(inode, file); security_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && @@ -304,7 +301,6 @@ void fput(struct file *file) if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - file_sb_list_del(file); if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) @@ -333,7 +329,6 @@ void __fput_sync(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - file_sb_list_del(file); BUG_ON(!(task->flags & PF_KTHREAD)); __fput(file); } @@ -345,129 +340,10 @@ void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_sb_list_del(file); file_free(file); } } -static inline int file_list_cpu(struct file *file) -{ -#ifdef CONFIG_SMP - return file->f_sb_list_cpu; -#else - return smp_processor_id(); -#endif -} - -/* helper for file_sb_list_add to reduce ifdefs */ -static inline void __file_sb_list_add(struct file *file, struct super_block *sb) -{ - struct list_head *list; -#ifdef CONFIG_SMP - int cpu; - cpu = smp_processor_id(); - file->f_sb_list_cpu = cpu; - list = per_cpu_ptr(sb->s_files, cpu); -#else - list = &sb->s_files; -#endif - list_add(&file->f_u.fu_list, list); -} - -/** - * file_sb_list_add - add a file to the sb's file list - * @file: file to add - * @sb: sb to add it to - * - * Use this function to associate a file with the superblock of the inode it - * refers to. - */ -void file_sb_list_add(struct file *file, struct super_block *sb) -{ - if (likely(!(file->f_mode & FMODE_WRITE))) - return; - if (!S_ISREG(file_inode(file)->i_mode)) - return; - lg_local_lock(&files_lglock); - __file_sb_list_add(file, sb); - lg_local_unlock(&files_lglock); -} - -/** - * file_sb_list_del - remove a file from the sb's file list - * @file: file to remove - * @sb: sb to remove it from - * - * Use this function to remove a file from its superblock. - */ -void file_sb_list_del(struct file *file) -{ - if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); - list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); - } -} - -#ifdef CONFIG_SMP - -/* - * These macros iterate all files on all CPUs for a given superblock. - * files_lglock must be held globally. - */ -#define do_file_list_for_each_entry(__sb, __file) \ -{ \ - int i; \ - for_each_possible_cpu(i) { \ - struct list_head *list; \ - list = per_cpu_ptr((__sb)->s_files, i); \ - list_for_each_entry((__file), list, f_u.fu_list) - -#define while_file_list_for_each_entry \ - } \ -} - -#else - -#define do_file_list_for_each_entry(__sb, __file) \ -{ \ - struct list_head *list; \ - list = &(sb)->s_files; \ - list_for_each_entry((__file), list, f_u.fu_list) - -#define while_file_list_for_each_entry \ -} - -#endif - -/** - * mark_files_ro - mark all files read-only - * @sb: superblock in question - * - * All files are marked read-only. We don't care about pending - * delete files so this should be used in 'force' mode only. - */ -void mark_files_ro(struct super_block *sb) -{ - struct file *f; - - lg_global_lock(&files_lglock); - do_file_list_for_each_entry(sb, f) { - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - spin_lock(&f->f_lock); - f->f_mode &= ~FMODE_WRITE; - spin_unlock(&f->f_lock); - if (file_check_writeable(f) != 0) - continue; - __mnt_drop_write(f->f_path.mnt); - file_release_write(f); - } while_file_list_for_each_entry; - lg_global_unlock(&files_lglock); -} - void __init files_init(unsigned long mempages) { unsigned long n; @@ -483,6 +359,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9f4935b8f20..09c11329a17 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -26,6 +26,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/tracepoint.h> +#include <linux/device.h> #include "internal.h" /* diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 24da581cb52..b96a49b37d6 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -473,7 +473,7 @@ err: static void cuse_fc_release(struct fuse_conn *fc) { struct cuse_conn *cc = fc_to_cc(fc); - kfree(cc); + kfree_rcu(cc, fc.rcu); } /** diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7989f2ab4c..c3eb2c46c8f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -342,24 +342,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, return err; } -static struct dentry *fuse_materialise_dentry(struct dentry *dentry, - struct inode *inode) -{ - struct dentry *newent; - - if (inode && S_ISDIR(inode->i_mode)) { - struct fuse_conn *fc = get_fuse_conn(inode); - - mutex_lock(&fc->inst_mutex); - newent = d_materialise_unique(dentry, inode); - mutex_unlock(&fc->inst_mutex); - } else { - newent = d_materialise_unique(dentry, inode); - } - - return newent; -} - static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) { @@ -382,7 +364,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, if (inode && get_node_id(inode) == FUSE_ROOT_ID) goto out_iput; - newent = fuse_materialise_dentry(entry, inode); + newent = d_materialise_unique(entry, inode); err = PTR_ERR(newent); if (IS_ERR(newent)) goto out_err; @@ -601,21 +583,9 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, } kfree(forget); - if (S_ISDIR(inode->i_mode)) { - struct dentry *alias; - mutex_lock(&fc->inst_mutex); - alias = d_find_alias(inode); - if (alias) { - /* New directory must have moved since mkdir */ - mutex_unlock(&fc->inst_mutex); - dput(alias); - iput(inode); - return -EBUSY; - } - d_instantiate(entry, inode); - mutex_unlock(&fc->inst_mutex); - } else - d_instantiate(entry, inode); + err = d_instantiate_no_diralias(entry, inode); + if (err) + return err; fuse_change_entry_timeout(entry, &outarg); fuse_invalidate_attr(dir); @@ -1284,7 +1254,7 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = fuse_materialise_dentry(dentry, inode); + alias = d_materialise_unique(dentry, inode); err = PTR_ERR(alias); if (IS_ERR(alias)) goto out; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 643274852c8..7d273091266 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -375,12 +375,11 @@ struct fuse_conn { /** Lock protecting accessess to members of this structure */ spinlock_t lock; - /** Mutex protecting against directory alias creation */ - struct mutex inst_mutex; - /** Refcount */ atomic_t count; + struct rcu_head rcu; + /** The user id for this mount */ kuid_t user_id; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a8ce6dab60a..d468643a68b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -565,7 +565,6 @@ void fuse_conn_init(struct fuse_conn *fc) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); - mutex_init(&fc->inst_mutex); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); @@ -596,7 +595,6 @@ void fuse_conn_put(struct fuse_conn *fc) if (atomic_dec_and_test(&fc->count)) { if (fc->destroy_req) fuse_request_free(fc->destroy_req); - mutex_destroy(&fc->inst_mutex); fc->release(fc); } } @@ -920,7 +918,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) static void fuse_free_conn(struct fuse_conn *fc) { - kfree(fc); + kfree_rcu(fc, rcu); } static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 109ce9325b7..1615df16cf4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1514,13 +1514,6 @@ out: return NULL; } -static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - /** * gfs2_permission - * @inode: The inode @@ -1872,7 +1865,7 @@ const struct inode_operations gfs2_dir_iops = { const struct inode_operations gfs2_symlink_iops = { .readlink = generic_readlink, .follow_link = gfs2_follow_link, - .put_link = gfs2_put_link, + .put_link = kfree_put_link, .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 1b398636e99..6797bf80f6e 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -80,6 +80,7 @@ struct hpfs_sb_info { unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ int sb_timeshift; + struct rcu_head rcu; }; /* Four 512-byte buffers and the 2k block obtained by concatenating them */ diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 345713d2f8f..1b39afdd86f 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -407,7 +407,7 @@ again: /*printk("HPFS: truncating file before delete.\n");*/ newattrs.ia_size = 0; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); + err = notify_change(dentry, &newattrs, NULL); put_write_access(inode); if (!err) goto again; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 4334cda8dba..b8d01ef6f53 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -101,18 +101,24 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, return 0; } -static void hpfs_put_super(struct super_block *s) +static void free_sbi(struct hpfs_sb_info *sbi) { - struct hpfs_sb_info *sbi = hpfs_sb(s); + kfree(sbi->sb_cp_table); + kfree(sbi->sb_bmp_dir); + kfree(sbi); +} +static void lazy_free_sbi(struct rcu_head *rcu) +{ + free_sbi(container_of(rcu, struct hpfs_sb_info, rcu)); +} + +static void hpfs_put_super(struct super_block *s) +{ hpfs_lock(s); unmark_dirty(s); hpfs_unlock(s); - - kfree(sbi->sb_cp_table); - kfree(sbi->sb_bmp_dir); - s->s_fs_info = NULL; - kfree(sbi); + call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi); } unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) @@ -485,9 +491,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) } s->s_fs_info = sbi; - sbi->sb_bmp_dir = NULL; - sbi->sb_cp_table = NULL; - mutex_init(&sbi->hpfs_mutex); hpfs_lock(s); @@ -679,10 +682,7 @@ bail2: brelse(bh0); bail1: bail0: hpfs_unlock(s); - kfree(sbi->sb_bmp_dir); - kfree(sbi->sb_cp_table); - s->s_fs_info = NULL; - kfree(sbi); + free_sbi(sbi); return -EINVAL; } diff --git a/fs/inode.c b/fs/inode.c index b33ba8e021c..4bcdad3c936 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -773,15 +773,11 @@ static struct inode *find_inode(struct super_block *sb, repeat: hlist_for_each_entry(inode, head, i_hash) { - spin_lock(&inode->i_lock); - if (inode->i_sb != sb) { - spin_unlock(&inode->i_lock); + if (inode->i_sb != sb) continue; - } - if (!test(inode, data)) { - spin_unlock(&inode->i_lock); + if (!test(inode, data)) continue; - } + spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -804,15 +800,11 @@ static struct inode *find_inode_fast(struct super_block *sb, repeat: hlist_for_each_entry(inode, head, i_hash) { - spin_lock(&inode->i_lock); - if (inode->i_ino != ino) { - spin_unlock(&inode->i_lock); + if (inode->i_ino != ino) continue; - } - if (inode->i_sb != sb) { - spin_unlock(&inode->i_lock); + if (inode->i_sb != sb) continue; - } + spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -951,6 +943,42 @@ void unlock_new_inode(struct inode *inode) EXPORT_SYMBOL(unlock_new_inode); /** + * lock_two_nondirectories - take two i_mutexes on non-directory objects + * @inode1: first inode to lock + * @inode2: second inode to lock + */ +void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) +{ + WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); + if (inode1 == inode2 || !inode2) { + mutex_lock(&inode1->i_mutex); + return; + } + WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); + if (inode1 < inode2) { + mutex_lock(&inode1->i_mutex); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2); + } else { + mutex_lock(&inode2->i_mutex); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2); + } +} +EXPORT_SYMBOL(lock_two_nondirectories); + +/** + * unlock_two_nondirectories - release locks from lock_two_nondirectories() + * @inode1: first inode to unlock + * @inode2: second inode to unlock + */ +void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) +{ + mutex_unlock(&inode1->i_mutex); + if (inode2 && inode2 != inode1) + mutex_unlock(&inode2->i_mutex); +} +EXPORT_SYMBOL(unlock_two_nondirectories); + +/** * iget5_locked - obtain an inode from a mounted file system * @sb: super block of file system * @hashval: hash value (usually inode number) to get @@ -1575,7 +1603,11 @@ static int __remove_suid(struct dentry *dentry, int kill) struct iattr newattrs; newattrs.ia_valid = ATTR_FORCE | kill; - return notify_change(dentry, &newattrs); + /* + * Note we call this on write, so notify_change will not + * encounter any conflicting delegations: + */ + return notify_change(dentry, &newattrs, NULL); } int file_remove_suid(struct file *file) diff --git a/fs/internal.h b/fs/internal.h index 513e0d859a6..46574240746 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,8 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/lglock.h> - struct super_block; struct file_system_type; struct linux_binprm; @@ -62,8 +60,6 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -extern struct lglock vfsmount_lock; - extern int __mnt_want_write(struct vfsmount *); extern int __mnt_want_write_file(struct file *); extern void __mnt_drop_write(struct vfsmount *); @@ -77,9 +73,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); -extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); /* diff --git a/fs/ioctl.c b/fs/ioctl.c index fd507fb460f..8ac3fad3619 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -37,7 +37,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd, { int error = -ENOTTY; - if (!filp->f_op || !filp->f_op->unlocked_ioctl) + if (!filp->f_op->unlocked_ioctl) goto out; error = filp->f_op->unlocked_ioctl(filp, cmd, arg); @@ -501,7 +501,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) + if (filp->f_op->fasync) /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index e5d408a7ea4..4a9e10ea13f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -181,7 +181,7 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hash_common(struct qstr *qstr, int ms) { const char *name; int len; @@ -202,7 +202,7 @@ isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hashi_common(struct qstr *qstr, int ms) { const char *name; int len; @@ -259,13 +259,13 @@ static int isofs_dentry_cmp_common( static int isofs_hash(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hash_common(dentry, qstr, 0); + return isofs_hash_common(qstr, 0); } static int isofs_hashi(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hashi_common(dentry, qstr, 0); + return isofs_hashi_common(qstr, 0); } static int @@ -286,13 +286,13 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hash_common(dentry, qstr, 1); + return isofs_hash_common(qstr, 1); } static int isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hashi_common(dentry, qstr, 1); + return isofs_hashi_common(qstr, 1); } static int diff --git a/fs/libfs.c b/fs/libfs.c index 3a3a9b53bf5..5de06947ba5 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -10,6 +10,7 @@ #include <linux/vfs.h> #include <linux/quotaops.h> #include <linux/mutex.h> +#include <linux/namei.h> #include <linux/exportfs.h> #include <linux/writeback.h> #include <linux/buffer_head.h> /* sync_mapping_buffers */ @@ -31,6 +32,7 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9); return 0; } +EXPORT_SYMBOL(simple_getattr); int simple_statfs(struct dentry *dentry, struct kstatfs *buf) { @@ -39,6 +41,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; return 0; } +EXPORT_SYMBOL(simple_statfs); /* * Retaining negative dentries for an in-memory filesystem just wastes @@ -66,6 +69,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned d_add(dentry, NULL); return NULL; } +EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { @@ -75,12 +79,14 @@ int dcache_dir_open(struct inode *inode, struct file *file) return file->private_data ? 0 : -ENOMEM; } +EXPORT_SYMBOL(dcache_dir_open); int dcache_dir_close(struct inode *inode, struct file *file) { dput(file->private_data); return 0; } +EXPORT_SYMBOL(dcache_dir_close); loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { @@ -123,6 +129,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) mutex_unlock(&dentry->d_inode->i_mutex); return offset; } +EXPORT_SYMBOL(dcache_dir_lseek); /* Relationship between i_mode and the DT_xxx types */ static inline unsigned char dt_type(struct inode *inode) @@ -172,11 +179,13 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) spin_unlock(&dentry->d_lock); return 0; } +EXPORT_SYMBOL(dcache_readdir); ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; } +EXPORT_SYMBOL(generic_read_dir); const struct file_operations simple_dir_operations = { .open = dcache_dir_open, @@ -186,10 +195,12 @@ const struct file_operations simple_dir_operations = { .iterate = dcache_readdir, .fsync = noop_fsync, }; +EXPORT_SYMBOL(simple_dir_operations); const struct inode_operations simple_dir_inode_operations = { .lookup = simple_lookup, }; +EXPORT_SYMBOL(simple_dir_inode_operations); static const struct super_operations simple_super_operations = { .statfs = simple_statfs, @@ -244,6 +255,7 @@ Enomem: deactivate_locked_super(s); return ERR_PTR(-ENOMEM); } +EXPORT_SYMBOL(mount_pseudo); int simple_open(struct inode *inode, struct file *file) { @@ -251,6 +263,7 @@ int simple_open(struct inode *inode, struct file *file) file->private_data = inode->i_private; return 0; } +EXPORT_SYMBOL(simple_open); int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -263,6 +276,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den d_instantiate(dentry, inode); return 0; } +EXPORT_SYMBOL(simple_link); int simple_empty(struct dentry *dentry) { @@ -283,6 +297,7 @@ out: spin_unlock(&dentry->d_lock); return ret; } +EXPORT_SYMBOL(simple_empty); int simple_unlink(struct inode *dir, struct dentry *dentry) { @@ -293,6 +308,7 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) dput(dentry); return 0; } +EXPORT_SYMBOL(simple_unlink); int simple_rmdir(struct inode *dir, struct dentry *dentry) { @@ -304,6 +320,7 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) drop_nlink(dir); return 0; } +EXPORT_SYMBOL(simple_rmdir); int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) @@ -330,6 +347,7 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; } +EXPORT_SYMBOL(simple_rename); /** * simple_setattr - setattr for simple filesystem @@ -370,6 +388,7 @@ int simple_readpage(struct file *file, struct page *page) unlock_page(page); return 0; } +EXPORT_SYMBOL(simple_readpage); int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -393,6 +412,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping, } return 0; } +EXPORT_SYMBOL(simple_write_begin); /** * simple_write_end - .write_end helper for non-block-device FSes @@ -444,6 +464,7 @@ int simple_write_end(struct file *file, struct address_space *mapping, return copied; } +EXPORT_SYMBOL(simple_write_end); /* * the inodes created here are not hashed. If you use iunique to generate @@ -512,6 +533,7 @@ out: dput(root); return -ENOMEM; } +EXPORT_SYMBOL(simple_fill_super); static DEFINE_SPINLOCK(pin_fs_lock); @@ -534,6 +556,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c mntput(mnt); return 0; } +EXPORT_SYMBOL(simple_pin_fs); void simple_release_fs(struct vfsmount **mount, int *count) { @@ -545,6 +568,7 @@ void simple_release_fs(struct vfsmount **mount, int *count) spin_unlock(&pin_fs_lock); mntput(mnt); } +EXPORT_SYMBOL(simple_release_fs); /** * simple_read_from_buffer - copy data from the buffer to user space @@ -579,6 +603,7 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, *ppos = pos + count; return count; } +EXPORT_SYMBOL(simple_read_from_buffer); /** * simple_write_to_buffer - copy data from user space to the buffer @@ -613,6 +638,7 @@ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, *ppos = pos + count; return count; } +EXPORT_SYMBOL(simple_write_to_buffer); /** * memory_read_from_buffer - copy data from the buffer @@ -644,6 +670,7 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, return count; } +EXPORT_SYMBOL(memory_read_from_buffer); /* * Transaction based IO. @@ -665,6 +692,7 @@ void simple_transaction_set(struct file *file, size_t n) smp_mb(); ar->size = n; } +EXPORT_SYMBOL(simple_transaction_set); char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) { @@ -696,6 +724,7 @@ char *simple_transaction_get(struct file *file, const char __user *buf, size_t s return ar->data; } +EXPORT_SYMBOL(simple_transaction_get); ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { @@ -705,12 +734,14 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size return 0; return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); } +EXPORT_SYMBOL(simple_transaction_read); int simple_transaction_release(struct inode *inode, struct file *file) { free_page((unsigned long)file->private_data); return 0; } +EXPORT_SYMBOL(simple_transaction_release); /* Simple attribute files */ @@ -746,12 +777,14 @@ int simple_attr_open(struct inode *inode, struct file *file, return nonseekable_open(inode, file); } +EXPORT_SYMBOL_GPL(simple_attr_open); int simple_attr_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } +EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ /* read from the buffer that is filled with the get function */ ssize_t simple_attr_read(struct file *file, char __user *buf, @@ -787,6 +820,7 @@ out: mutex_unlock(&attr->mutex); return ret; } +EXPORT_SYMBOL_GPL(simple_attr_read); /* interpret the buffer as a number to call the set function with */ ssize_t simple_attr_write(struct file *file, const char __user *buf, @@ -819,6 +853,7 @@ out: mutex_unlock(&attr->mutex); return ret; } +EXPORT_SYMBOL_GPL(simple_attr_write); /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation @@ -957,39 +992,56 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) { return 0; } - -EXPORT_SYMBOL(dcache_dir_close); -EXPORT_SYMBOL(dcache_dir_lseek); -EXPORT_SYMBOL(dcache_dir_open); -EXPORT_SYMBOL(dcache_readdir); -EXPORT_SYMBOL(generic_read_dir); -EXPORT_SYMBOL(mount_pseudo); -EXPORT_SYMBOL(simple_write_begin); -EXPORT_SYMBOL(simple_write_end); -EXPORT_SYMBOL(simple_dir_inode_operations); -EXPORT_SYMBOL(simple_dir_operations); -EXPORT_SYMBOL(simple_empty); -EXPORT_SYMBOL(simple_fill_super); -EXPORT_SYMBOL(simple_getattr); -EXPORT_SYMBOL(simple_open); -EXPORT_SYMBOL(simple_link); -EXPORT_SYMBOL(simple_lookup); -EXPORT_SYMBOL(simple_pin_fs); -EXPORT_SYMBOL(simple_readpage); -EXPORT_SYMBOL(simple_release_fs); -EXPORT_SYMBOL(simple_rename); -EXPORT_SYMBOL(simple_rmdir); -EXPORT_SYMBOL(simple_statfs); EXPORT_SYMBOL(noop_fsync); -EXPORT_SYMBOL(simple_unlink); -EXPORT_SYMBOL(simple_read_from_buffer); -EXPORT_SYMBOL(simple_write_to_buffer); -EXPORT_SYMBOL(memory_read_from_buffer); -EXPORT_SYMBOL(simple_transaction_set); -EXPORT_SYMBOL(simple_transaction_get); -EXPORT_SYMBOL(simple_transaction_read); -EXPORT_SYMBOL(simple_transaction_release); -EXPORT_SYMBOL_GPL(simple_attr_open); -EXPORT_SYMBOL_GPL(simple_attr_release); -EXPORT_SYMBOL_GPL(simple_attr_read); -EXPORT_SYMBOL_GPL(simple_attr_write); + +void kfree_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} +EXPORT_SYMBOL(kfree_put_link); + +/* + * nop .set_page_dirty method so that people can use .page_mkwrite on + * anon inodes. + */ +static int anon_set_page_dirty(struct page *page) +{ + return 0; +}; + +/* + * A single inode exists for all anon_inode files. Contrary to pipes, + * anon_inode inodes have no associated per-instance data, so we need + * only allocate one of them. + */ +struct inode *alloc_anon_inode(struct super_block *s) +{ + static const struct address_space_operations anon_aops = { + .set_page_dirty = anon_set_page_dirty, + }; + struct inode *inode = new_inode_pseudo(s); + + if (!inode) + return ERR_PTR(-ENOMEM); + + inode->i_ino = get_next_ino(); + inode->i_mapping->a_ops = &anon_aops; + + /* + * Mark the inode dirty from the very beginning, + * that way it will never be moved to the dirty + * list because mark_inode_dirty() will think + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; + inode->i_mode = S_IRUSR | S_IWUSR; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_flags |= S_PRIVATE; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + return inode; +} +EXPORT_SYMBOL(alloc_anon_inode); diff --git a/fs/locks.c b/fs/locks.c index b27a3005d78..f99d52bdd05 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -134,7 +134,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) -#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) +#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) static bool lease_breaking(struct file_lock *fl) { @@ -1292,28 +1292,40 @@ static void time_out_leases(struct inode *inode) } } +static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) +{ + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) + return false; + return locks_conflict(breaker, lease); +} + /** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return - * @mode: the open mode (read or write) + * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: + * break all leases + * @type: FL_LEASE: break leases and delegations; FL_DELEG: break + * only delegations * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ -int __break_lease(struct inode *inode, unsigned int mode) +int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; struct file_lock *new_fl, *flock; struct file_lock *fl; unsigned long break_time; int i_have_this_lease = 0; + bool lease_conflict = false; int want_write = (mode & O_ACCMODE) != O_RDONLY; new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); + new_fl->fl_flags = type; spin_lock(&inode->i_lock); @@ -1323,13 +1335,16 @@ int __break_lease(struct inode *inode, unsigned int mode) if ((flock == NULL) || !IS_LEASE(flock)) goto out; - if (!locks_conflict(flock, new_fl)) + for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { + if (leases_conflict(fl, new_fl)) { + lease_conflict = true; + if (fl->fl_owner == current->files) + i_have_this_lease = 1; + } + } + if (!lease_conflict) goto out; - for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) - if (fl->fl_owner == current->files) - i_have_this_lease = 1; - break_time = 0; if (lease_break_time > 0) { break_time = jiffies + lease_break_time * HZ; @@ -1338,6 +1353,8 @@ int __break_lease(struct inode *inode, unsigned int mode) } for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { + if (!leases_conflict(fl, new_fl)) + continue; if (want_write) { if (fl->fl_flags & FL_UNLOCK_PENDING) continue; @@ -1379,7 +1396,7 @@ restart: */ for (flock = inode->i_flock; flock && IS_LEASE(flock); flock = flock->fl_next) { - if (locks_conflict(new_fl, flock)) + if (leases_conflict(new_fl, flock)) goto restart; } error = 0; @@ -1460,9 +1477,26 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp struct file_lock *fl, **before, **my_before = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; lease = *flp; + /* + * In the delegation case we need mutual exclusion with + * a number of operations that take the i_mutex. We trylock + * because delegations are an optional optimization, and if + * there's some chance of a conflict--we'd rather not + * bother, maybe that's a sign this just isn't a good file to + * hand out a delegation on. + */ + if (is_deleg && !mutex_trylock(&inode->i_mutex)) + return -EAGAIN; + + if (is_deleg && arg == F_WRLCK) { + /* Write delegations are not currently supported: */ + WARN_ON_ONCE(1); + return -EINVAL; + } error = -EAGAIN; if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) @@ -1514,9 +1548,10 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp goto out; locks_insert_lock(before, lease); - return 0; - + error = 0; out: + if (is_deleg) + mutex_unlock(&inode->i_mutex); return error; } @@ -1579,7 +1614,7 @@ EXPORT_SYMBOL(generic_setlease); static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) { - if (filp->f_op && filp->f_op->setlease) + if (filp->f_op->setlease) return filp->f_op->setlease(filp, arg, lease); else return generic_setlease(filp, arg, lease); @@ -1771,7 +1806,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (error) goto out_free; - if (f.file->f_op && f.file->f_op->flock) + if (f.file->f_op->flock) error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); @@ -1797,7 +1832,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; @@ -1909,7 +1944,7 @@ out: */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); @@ -2182,7 +2217,7 @@ void locks_remove_flock(struct file *filp) if (!inode->i_flock) return; - if (filp->f_op && filp->f_op->flock) { + if (filp->f_op->flock) { struct file_lock fl = { .fl_pid = current->tgid, .fl_file = filp, @@ -2246,7 +2281,7 @@ EXPORT_SYMBOL(posix_unblock_lock); */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } diff --git a/fs/mount.h b/fs/mount.h index 64a858143ff..d64c594be6c 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -29,6 +29,7 @@ struct mount { struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; + struct rcu_head mnt_rcu; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else @@ -55,7 +56,7 @@ struct mount { int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; - int mnt_ghosts; + struct path mnt_ex_mountpoint; }; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ @@ -76,13 +77,28 @@ static inline int is_mounted(struct vfsmount *mnt) return !IS_ERR_OR_NULL(real_mount(mnt)); } -extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); +extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); +extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); + +extern bool legitimize_mnt(struct vfsmount *, unsigned); static inline void get_mnt_ns(struct mnt_namespace *ns) { atomic_inc(&ns->count); } +extern seqlock_t mount_lock; + +static inline void lock_mount_hash(void) +{ + write_seqlock(&mount_lock); +} + +static inline void unlock_mount_hash(void) +{ + write_sequnlock(&mount_lock); +} + struct proc_mounts { struct seq_file m; struct mnt_namespace *ns; diff --git a/fs/namei.c b/fs/namei.c index caa28051e19..e029a4cbff7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put); * to restart the path walk from the beginning in ref-walk mode. */ -static inline void lock_rcu_walk(void) -{ - br_read_lock(&vfsmount_lock); - rcu_read_lock(); -} - -static inline void unlock_rcu_walk(void) -{ - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); -} - /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data @@ -512,26 +500,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) BUG_ON(!(nd->flags & LOOKUP_RCU)); /* - * Get a reference to the parent first: we're - * going to make "path_put(nd->path)" valid in - * non-RCU context for "terminate_walk()". - * - * If this doesn't work, return immediately with - * RCU walking still active (and then we will do - * the RCU walk cleanup in terminate_walk()). + * After legitimizing the bastards, terminate_walk() + * will do the right thing for non-RCU mode, and all our + * subsequent exit cases should rcu_read_unlock() + * before returning. Do vfsmount first; if dentry + * can't be legitimized, just set nd->path.dentry to NULL + * and rely on dput(NULL) being a no-op. */ - if (!lockref_get_not_dead(&parent->d_lockref)) + if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) return -ECHILD; - - /* - * After the mntget(), we terminate_walk() will do - * the right thing for non-RCU mode, and all our - * subsequent exit cases should unlock_rcu_walk() - * before returning. - */ - mntget(nd->path.mnt); nd->flags &= ~LOOKUP_RCU; + if (!lockref_get_not_dead(&parent->d_lockref)) { + nd->path.dentry = NULL; + rcu_read_unlock(); + return -ECHILD; + } + /* * For a negative lookup, the lookup sequence point is the parents * sequence point, and it only needs to revalidate the parent dentry. @@ -566,17 +551,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) spin_unlock(&fs->lock); } - unlock_rcu_walk(); + rcu_read_unlock(); return 0; unlock_and_drop_dentry: spin_unlock(&fs->lock); drop_dentry: - unlock_rcu_walk(); + rcu_read_unlock(); dput(dentry); goto drop_root_mnt; out: - unlock_rcu_walk(); + rcu_read_unlock(); drop_root_mnt: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; @@ -608,17 +593,22 @@ static int complete_walk(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; + if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) { + rcu_read_unlock(); + return -ECHILD; + } if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { - unlock_rcu_walk(); + rcu_read_unlock(); + mntput(nd->path.mnt); return -ECHILD; } if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { - unlock_rcu_walk(); + rcu_read_unlock(); dput(dentry); + mntput(nd->path.mnt); return -ECHILD; } - mntget(nd->path.mnt); - unlock_rcu_walk(); + rcu_read_unlock(); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -909,15 +899,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(&vfsmount_lock); + read_seqlock_excl(&mount_lock); parent = mnt->mnt_parent; if (parent == mnt) { - br_read_unlock(&vfsmount_lock); + read_sequnlock_excl(&mount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(&vfsmount_lock); + read_sequnlock_excl(&mount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -1048,8 +1038,8 @@ static int follow_managed(struct path *path, unsigned flags) /* Something is mounted on this dentry in another * namespace and/or whatever was mounted there in this - * namespace got unmounted before we managed to get the - * vfsmount_lock */ + * namespace got unmounted before lookup_mnt() could + * get it */ } /* Handle an automount point */ @@ -1111,7 +1101,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, if (!d_mountpoint(path->dentry)) break; - mounted = __lookup_mnt(path->mnt, path->dentry, 1); + mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) break; path->mnt = &mounted->mnt; @@ -1132,7 +1122,7 @@ static void follow_mount_rcu(struct nameidata *nd) { while (d_mountpoint(nd->path.dentry)) { struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); if (!mounted) break; nd->path.mnt = &mounted->mnt; @@ -1174,7 +1164,7 @@ failed: nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - unlock_rcu_walk(); + rcu_read_unlock(); return -ECHILD; } @@ -1308,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, } /* - * Call i_op->lookup on the dentry. The dentry must be negative but may be - * hashed if it was pouplated with DCACHE_NEED_LOOKUP. + * Call i_op->lookup on the dentry. The dentry must be negative and + * unhashed. * * dir->d_inode->i_mutex must be held */ @@ -1501,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd) nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - unlock_rcu_walk(); + rcu_read_unlock(); } } @@ -1511,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd) * so we keep a cache of "no, this doesn't need follow_link" * for the common case. */ -static inline int should_follow_link(struct inode *inode, int follow) +static inline int should_follow_link(struct dentry *dentry, int follow) { - if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { - if (likely(inode->i_op->follow_link)) - return follow; - - /* This gets set once for the inode lifetime */ - spin_lock(&inode->i_lock); - inode->i_opflags |= IOP_NOFOLLOW; - spin_unlock(&inode->i_lock); - } - return 0; + return unlikely(d_is_symlink(dentry)) ? follow : 0; } static inline int walk_component(struct nameidata *nd, struct path *path, @@ -1552,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, if (!inode) goto out_path_put; - if (should_follow_link(inode, follow)) { + if (should_follow_link(path->dentry, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { err = -ECHILD; @@ -1611,26 +1592,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) } /* - * We really don't want to look at inode->i_op->lookup - * when we don't have to. So we keep a cache bit in - * the inode ->i_opflags field that says "yes, we can - * do lookup on this inode". - */ -static inline int can_lookup(struct inode *inode) -{ - if (likely(inode->i_opflags & IOP_LOOKUP)) - return 1; - if (likely(!inode->i_op->lookup)) - return 0; - - /* We do this once for the lifetime of the inode */ - spin_lock(&inode->i_lock); - inode->i_opflags |= IOP_LOOKUP; - spin_unlock(&inode->i_lock); - return 1; -} - -/* * We can do the critical dentry name comparison and hashing * operations one word at a time, but we are limited to: * @@ -1833,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) return err; } - if (!can_lookup(nd->inode)) { + if (!d_is_directory(nd->path.dentry)) { err = -ENOTDIR; break; } @@ -1851,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->flags = flags | LOOKUP_JUMPED; nd->depth = 0; if (flags & LOOKUP_ROOT) { - struct inode *inode = nd->root.dentry->d_inode; + struct dentry *root = nd->root.dentry; + struct inode *inode = root->d_inode; if (*name) { - if (!can_lookup(inode)) + if (!d_is_directory(root)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); if (retval) @@ -1862,8 +1824,9 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - lock_rcu_walk(); + rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + nd->m_seq = read_seqbegin(&mount_lock); } else { path_get(&nd->path); } @@ -1872,9 +1835,10 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->root.mnt = NULL; + nd->m_seq = read_seqbegin(&mount_lock); if (*name=='/') { if (flags & LOOKUP_RCU) { - lock_rcu_walk(); + rcu_read_lock(); set_root_rcu(nd); } else { set_root(nd); @@ -1886,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - lock_rcu_walk(); + rcu_read_lock(); do { seq = read_seqcount_begin(&fs->seq); @@ -1907,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; if (*name) { - if (!can_lookup(dentry->d_inode)) { + if (!d_is_directory(dentry)) { fdput(f); return -ENOTDIR; } @@ -1918,7 +1882,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (f.need_put) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - lock_rcu_walk(); + rcu_read_lock(); } else { path_get(&nd->path); fdput(f); @@ -1989,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name, err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) { - if (!can_lookup(nd->inode)) { + if (!d_is_directory(nd->path.dentry)) { path_put(&nd->path); err = -ENOTDIR; } @@ -2281,7 +2245,7 @@ done: } path->dentry = dentry; path->mnt = mntget(nd->path.mnt); - if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) + if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) return 1; follow_mount(path); error = 0; @@ -2426,12 +2390,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) { + struct inode *inode = victim->d_inode; int error; - if (!victim->d_inode) + if (d_is_negative(victim)) return -ENOENT; + BUG_ON(!inode); BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); @@ -2441,15 +2407,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) return error; if (IS_APPEND(dir)) return -EPERM; - if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) + + if (check_sticky(dir, inode) || IS_APPEND(inode) || + IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) return -EPERM; if (isdir) { - if (!S_ISDIR(victim->d_inode->i_mode)) + if (!d_is_directory(victim) && !d_is_autodir(victim)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - } else if (S_ISDIR(victim->d_inode->i_mode)) + } else if (d_is_directory(victim) || d_is_autodir(victim)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; @@ -2983,7 +2950,7 @@ retry_lookup: /* * create/update audit record if it already exists. */ - if (path->dentry->d_inode) + if (d_is_positive(path->dentry)) audit_inode(name, path->dentry, 0); /* @@ -3012,12 +2979,12 @@ retry_lookup: finish_lookup: /* we _can_ be in RCU mode here */ error = -ENOENT; - if (!inode) { + if (d_is_negative(path->dentry)) { path_to_nameidata(path, nd); goto out; } - if (should_follow_link(inode, !symlink_ok)) { + if (should_follow_link(path->dentry, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { error = -ECHILD; @@ -3046,10 +3013,11 @@ finish_open: } audit_inode(name, nd->path.dentry, 0); error = -EISDIR; - if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) + if ((open_flag & O_CREAT) && + (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry))) goto out; error = -ENOTDIR; - if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) + if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) goto out; if (!S_ISREG(nd->inode->i_mode)) will_truncate = false; @@ -3275,7 +3243,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, nd.root.mnt = mnt; nd.root.dentry = dentry; - if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) + if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); @@ -3325,8 +3293,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname, goto unlock; error = -EEXIST; - if (dentry->d_inode) + if (d_is_positive(dentry)) goto fail; + /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - @@ -3647,8 +3616,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) return do_rmdir(AT_FDCWD, pathname); } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +/** + * vfs_unlink - unlink a filesystem object + * @dir: parent directory + * @dentry: victim + * @delegated_inode: returns victim inode, if the inode is delegated. + * + * The caller must hold dir->i_mutex. + * + * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and + * return a reference to the inode in delegated_inode. The caller + * should then break the delegation on that inode and retry. Because + * breaking a delegation may take a long time, the caller should drop + * dir->i_mutex before doing so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + */ +int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { + struct inode *target = dentry->d_inode; int error = may_delete(dir, dentry, 0); if (error) @@ -3657,22 +3645,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (!dir->i_op->unlink) return -EPERM; - mutex_lock(&dentry->d_inode->i_mutex); + mutex_lock(&target->i_mutex); if (d_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); if (!error) { + error = try_break_deleg(target, delegated_inode); + if (error) + goto out; error = dir->i_op->unlink(dir, dentry); if (!error) dont_mount(dentry); } } - mutex_unlock(&dentry->d_inode->i_mutex); +out: + mutex_unlock(&target->i_mutex); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { - fsnotify_link_count(dentry->d_inode); + fsnotify_link_count(target); d_delete(dentry); } @@ -3692,6 +3684,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; + struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: name = user_path_parent(dfd, pathname, &nd, lookup_flags); @@ -3706,7 +3699,7 @@ retry: error = mnt_want_write(nd.path.mnt); if (error) goto exit1; - +retry_deleg: mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); @@ -3715,19 +3708,25 @@ retry: if (nd.last.name[nd.last.len]) goto slashes; inode = dentry->d_inode; - if (!inode) + if (d_is_negative(dentry)) goto slashes; ihold(inode); error = security_path_unlink(&nd.path, dentry); if (error) goto exit2; - error = vfs_unlink(nd.path.dentry->d_inode, dentry); + error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode); exit2: dput(dentry); } mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ + inode = NULL; + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); @@ -3740,8 +3739,12 @@ exit1: return error; slashes: - error = !dentry->d_inode ? -ENOENT : - S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; + if (d_is_negative(dentry)) + error = -ENOENT; + else if (d_is_directory(dentry) || d_is_autodir(dentry)) + error = -EISDIR; + else + error = -ENOTDIR; goto exit2; } @@ -3817,7 +3820,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn return sys_symlinkat(oldname, AT_FDCWD, newname); } -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) +/** + * vfs_link - create a new link + * @old_dentry: object to be linked + * @dir: new parent + * @new_dentry: where to create the new link + * @delegated_inode: returns inode needing a delegation break + * + * The caller must hold dir->i_mutex + * + * If vfs_link discovers a delegation on the to-be-linked file in need + * of breaking, it will return -EWOULDBLOCK and return a reference to the + * inode in delegated_inode. The caller should then break the delegation + * and retry. Because breaking a delegation may take a long time, the + * caller should drop the i_mutex before doing so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + */ +int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -3853,8 +3875,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de error = -ENOENT; else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; - else - error = dir->i_op->link(old_dentry, dir, new_dentry); + else { + error = try_break_deleg(inode, delegated_inode); + if (!error) + error = dir->i_op->link(old_dentry, dir, new_dentry); + } if (!error && (inode->i_state & I_LINKABLE)) { spin_lock(&inode->i_lock); @@ -3881,6 +3906,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, { struct dentry *new_dentry; struct path old_path, new_path; + struct inode *delegated_inode = NULL; int how = 0; int error; @@ -3919,9 +3945,14 @@ retry: error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); + error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, how)) { how |= LOOKUP_REVAL; goto retry; @@ -3946,7 +3977,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * That's where 4.4 screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. - * c) we have to lock _three_ objects - parents and victim (if it exists). + * c) we have to lock _four_ objects - parents and victim (if it exists), + * and source (if it is not a directory). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4019,9 +4051,11 @@ out: } static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode) { struct inode *target = new_dentry->d_inode; + struct inode *source = old_dentry->d_inode; int error; error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -4029,13 +4063,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, return error; dget(new_dentry); - if (target) - mutex_lock(&target->i_mutex); + lock_two_nondirectories(source, target); error = -EBUSY; if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) goto out; + error = try_break_deleg(source, delegated_inode); + if (error) + goto out; + if (target) { + error = try_break_deleg(target, delegated_inode); + if (error) + goto out; + } error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; @@ -4045,17 +4086,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry, new_dentry); out: - if (target) - mutex_unlock(&target->i_mutex); + unlock_two_nondirectories(source, target); dput(new_dentry); return error; } +/** + * vfs_rename - rename a filesystem object + * @old_dir: parent of source + * @old_dentry: source + * @new_dir: parent of destination + * @new_dentry: destination + * @delegated_inode: returns an inode needing a delegation break + * + * The caller must hold multiple mutexes--see lock_rename()). + * + * If vfs_rename discovers a delegation in need of breaking at either + * the source or destination, it will return -EWOULDBLOCK and return a + * reference to the inode in delegated_inode. The caller should then + * break the delegation and retry. Because breaking a delegation may + * take a long time, the caller should drop all locks before doing + * so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + */ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode) { int error; - int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); + int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry); const unsigned char *old_name; if (old_dentry->d_inode == new_dentry->d_inode) @@ -4080,7 +4142,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (is_dir) error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); else - error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); + error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); if (!error) fsnotify_move(old_dir, new_dir, old_name, is_dir, new_dentry->d_inode, old_dentry); @@ -4096,6 +4158,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct dentry *old_dentry, *new_dentry; struct dentry *trap; struct nameidata oldnd, newnd; + struct inode *delegated_inode = NULL; struct filename *from; struct filename *to; unsigned int lookup_flags = 0; @@ -4135,6 +4198,7 @@ retry: newnd.flags &= ~LOOKUP_PARENT; newnd.flags |= LOOKUP_RENAME_TARGET; +retry_deleg: trap = lock_rename(new_dir, old_dir); old_dentry = lookup_hash(&oldnd); @@ -4143,10 +4207,10 @@ retry: goto exit3; /* source must exist */ error = -ENOENT; - if (!old_dentry->d_inode) + if (d_is_negative(old_dentry)) goto exit4; /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!S_ISDIR(old_dentry->d_inode->i_mode)) { + if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) { error = -ENOTDIR; if (oldnd.last.name[oldnd.last.len]) goto exit4; @@ -4171,13 +4235,19 @@ retry: if (error) goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); + new_dir->d_inode, new_dentry, + &delegated_inode); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_dir, old_dir); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write(oldnd.path.mnt); exit2: if (retry_estale(error, lookup_flags)) diff --git a/fs/namespace.c b/fs/namespace.c index da5c4948343..ac2ce8a766e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -39,7 +39,7 @@ static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; static struct list_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; -static struct rw_semaphore namespace_sem; +static DECLARE_RWSEM(namespace_sem); /* /sys/fs */ struct kobject *fs_kobj; @@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); * It should be taken for write in all cases where the vfsmount * tree or hash is modified or when a vfsmount structure is modified. */ -DEFINE_BRLOCK(vfsmount_lock); +__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { @@ -63,8 +63,6 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) return tmp & (HASH_SIZE - 1); } -#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) - /* * allocation is serialized by namespace_sem, but we need the spinlock to * serialize with freeing. @@ -458,7 +456,7 @@ static int mnt_make_readonly(struct mount *mnt) { int ret = 0; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -492,15 +490,15 @@ static int mnt_make_readonly(struct mount *mnt) */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -512,7 +510,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -534,7 +532,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return err; } @@ -549,30 +547,56 @@ static void free_vfsmnt(struct mount *mnt) kmem_cache_free(mnt_cache, mnt); } +/* call under rcu_read_lock */ +bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) +{ + struct mount *mnt; + if (read_seqretry(&mount_lock, seq)) + return false; + if (bastard == NULL) + return true; + mnt = real_mount(bastard); + mnt_add_count(mnt, 1); + if (likely(!read_seqretry(&mount_lock, seq))) + return true; + if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { + mnt_add_count(mnt, -1); + return false; + } + rcu_read_unlock(); + mntput(bastard); + rcu_read_lock(); + return false; +} + /* - * find the first or last mount at @dentry on vfsmount @mnt depending on - * @dir. If @dir is set return the first mount else return the last mount. - * vfsmount_lock must be held for read or write. + * find the first mount at @dentry on vfsmount @mnt. + * call under rcu_read_lock() */ -struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, - int dir) +struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { struct list_head *head = mount_hashtable + hash(mnt, dentry); - struct list_head *tmp = head; - struct mount *p, *found = NULL; + struct mount *p; - for (;;) { - tmp = dir ? tmp->next : tmp->prev; - p = NULL; - if (tmp == head) - break; - p = list_entry(tmp, struct mount, mnt_hash); - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) { - found = p; - break; - } - } - return found; + list_for_each_entry_rcu(p, head, mnt_hash) + if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) + return p; + return NULL; +} + +/* + * find the last mount at @dentry on vfsmount @mnt. + * mount_lock must be held. + */ +struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) +{ + struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct mount *p; + + list_for_each_entry_reverse(p, head, mnt_hash) + if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) + return p; + return NULL; } /* @@ -594,17 +618,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct mount *child_mnt; + struct vfsmount *m; + unsigned seq; - br_read_lock(&vfsmount_lock); - child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); - if (child_mnt) { - mnt_add_count(child_mnt, 1); - br_read_unlock(&vfsmount_lock); - return &child_mnt->mnt; - } else { - br_read_unlock(&vfsmount_lock); - return NULL; - } + rcu_read_lock(); + do { + seq = read_seqbegin(&mount_lock); + child_mnt = __lookup_mnt(path->mnt, path->dentry); + m = child_mnt ? &child_mnt->mnt : NULL; + } while (!legitimize_mnt(m, seq)); + rcu_read_unlock(); + return m; } static struct mountpoint *new_mountpoint(struct dentry *dentry) @@ -796,9 +820,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -839,9 +863,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); if ((flag & CL_SLAVE) || ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { @@ -872,64 +896,66 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return ERR_PTR(err); } -static inline void mntfree(struct mount *mnt) +static void delayed_free(struct rcu_head *head) { - struct vfsmount *m = &mnt->mnt; - struct super_block *sb = m->mnt_sb; - - /* - * This probably indicates that somebody messed - * up a mnt_want/drop_write() pair. If this - * happens, the filesystem was probably unable - * to make r/w->r/o transitions. - */ - /* - * The locking used to deal with mnt_count decrement provides barriers, - * so mnt_get_writers() below is safe. - */ - WARN_ON(mnt_get_writers(mnt)); - fsnotify_vfsmount_delete(m); - dput(m->mnt_root); - free_vfsmnt(mnt); - deactivate_super(sb); + struct mount *mnt = container_of(head, struct mount, mnt_rcu); + kfree(mnt->mnt_devname); +#ifdef CONFIG_SMP + free_percpu(mnt->mnt_pcp); +#endif + kmem_cache_free(mnt_cache, mnt); } static void mntput_no_expire(struct mount *mnt) { put_again: -#ifdef CONFIG_SMP - br_read_lock(&vfsmount_lock); - if (likely(mnt->mnt_ns)) { - /* shouldn't be the last one */ - mnt_add_count(mnt, -1); - br_read_unlock(&vfsmount_lock); + rcu_read_lock(); + mnt_add_count(mnt, -1); + if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + rcu_read_unlock(); return; } - br_read_unlock(&vfsmount_lock); - - br_write_lock(&vfsmount_lock); - mnt_add_count(mnt, -1); + lock_mount_hash(); if (mnt_get_count(mnt)) { - br_write_unlock(&vfsmount_lock); + rcu_read_unlock(); + unlock_mount_hash(); return; } -#else - mnt_add_count(mnt, -1); - if (likely(mnt_get_count(mnt))) - return; - br_write_lock(&vfsmount_lock); -#endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(&vfsmount_lock); + rcu_read_unlock(); + unlock_mount_hash(); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { + rcu_read_unlock(); + unlock_mount_hash(); + return; + } + mnt->mnt.mnt_flags |= MNT_DOOMED; + rcu_read_unlock(); list_del(&mnt->mnt_instance); - br_write_unlock(&vfsmount_lock); - mntfree(mnt); + unlock_mount_hash(); + + /* + * This probably indicates that somebody messed + * up a mnt_want/drop_write() pair. If this + * happens, the filesystem was probably unable + * to make r/w->r/o transitions. + */ + /* + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. + */ + WARN_ON(mnt_get_writers(mnt)); + fsnotify_vfsmount_delete(&mnt->mnt); + dput(mnt->mnt.mnt_root); + deactivate_super(mnt->mnt.mnt_sb); + mnt_free_id(mnt); + call_rcu(&mnt->mnt_rcu, delayed_free); } void mntput(struct vfsmount *mnt) @@ -954,21 +980,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); real_mount(mnt)->mnt_pinned++; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } EXPORT_SYMBOL(mnt_unpin); @@ -1085,12 +1111,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(&vfsmount_lock); + lock_mount_hash(); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); if (actual_refs > minimum_refs) return 0; @@ -1117,10 +1143,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); up_read(&namespace_sem); return ret; } @@ -1142,23 +1168,13 @@ static void namespace_unlock(void) list_splice_init(&unmounted, &head); up_write(&namespace_sem); + synchronize_rcu(); + while (!list_empty(&head)) { mnt = list_first_entry(&head, struct mount, mnt_hash); list_del_init(&mnt->mnt_hash); - if (mnt_has_parent(mnt)) { - struct dentry *dentry; - struct mount *m; - - br_write_lock(&vfsmount_lock); - dentry = mnt->mnt_mountpoint; - m = mnt->mnt_parent; - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - m->mnt_ghosts--; - br_write_unlock(&vfsmount_lock); - dput(dentry); - mntput(&m->mnt); - } + if (mnt->mnt_ex_mountpoint.mnt) + path_put(&mnt->mnt_ex_mountpoint); mntput(&mnt->mnt); } } @@ -1169,10 +1185,13 @@ static inline void namespace_lock(void) } /* - * vfsmount lock must be held for write + * mount_lock must be held * namespace_sem must be held for write + * how = 0 => just this tree, don't propagate + * how = 1 => propagate; we know that nobody else has reference to any victims + * how = 2 => lazy umount */ -void umount_tree(struct mount *mnt, int propagate) +void umount_tree(struct mount *mnt, int how) { LIST_HEAD(tmp_list); struct mount *p; @@ -1180,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate) for (p = mnt; p; p = next_mnt(p, mnt)) list_move(&p->mnt_hash, &tmp_list); - if (propagate) + if (how) propagate_umount(&tmp_list); list_for_each_entry(p, &tmp_list, mnt_hash) { @@ -1188,10 +1207,16 @@ void umount_tree(struct mount *mnt, int propagate) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; + if (how < 2) + p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { - p->mnt_parent->mnt_ghosts++; put_mountpoint(p->mnt_mp); + /* move the reference to mountpoint into ->mnt_ex_mountpoint */ + p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; + p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; + p->mnt_mountpoint = p->mnt.mnt_root; + p->mnt_parent = p; p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); @@ -1225,12 +1250,12 @@ static int do_umount(struct mount *mnt, int flags) * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (mnt_get_count(mnt) != 2) { - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return -EBUSY; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1272,19 +1297,23 @@ static int do_umount(struct mount *mnt, int flags) } namespace_lock(); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); event++; - if (!(flags & MNT_DETACH)) - shrink_submounts(mnt); - - retval = -EBUSY; - if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { + if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 1); + umount_tree(mnt, 2); retval = 0; + } else { + shrink_submounts(mnt); + retval = -EBUSY; + if (!propagate_mount_busy(mnt, 2)) { + if (!list_empty(&mnt->mnt_list)) + umount_tree(mnt, 1); + retval = 0; + } } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); namespace_unlock(); return retval; } @@ -1427,18 +1456,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt.mnt_root, flag); if (IS_ERR(q)) goto out; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, parent, p->mnt_mp); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } } return res; out: if (res) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); umount_tree(res, 0); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } return q; } @@ -1460,9 +1489,9 @@ struct vfsmount *collect_mounts(struct path *path) void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); umount_tree(real_mount(mnt), 0); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); namespace_unlock(); } @@ -1589,7 +1618,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out_cleanup_ids; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1608,7 +1637,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return 0; @@ -1710,10 +1739,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - br_write_lock(&vfsmount_lock); + lock_mount_hash(); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); out_unlock: namespace_unlock(); @@ -1785,9 +1814,9 @@ static int do_loopback(struct path *path, const char *old_name, err = graft_tree(mnt, parent, mp); if (err) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); umount_tree(mnt, 0); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } out2: unlock_mount(mp); @@ -1846,17 +1875,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(&vfsmount_lock); - } - up_write(&sb->s_umount); - if (!err) { - br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } + up_write(&sb->s_umount); return err; } @@ -1972,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) struct mount *parent; int err; - mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); + mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); mp = lock_mount(path); if (IS_ERR(mp)) @@ -2077,9 +2102,7 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { namespace_lock(); - br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(&vfsmount_lock); namespace_unlock(); } mntput(m); @@ -2095,11 +2118,9 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { namespace_lock(); - br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(&vfsmount_lock); namespace_unlock(); } EXPORT_SYMBOL(mnt_set_expiry); @@ -2118,7 +2139,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; namespace_lock(); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -2137,7 +2158,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1); } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); namespace_unlock(); } @@ -2193,7 +2214,7 @@ resume: * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint * - * vfsmount_lock must be held for write + * mount_lock must be held for write */ static void shrink_submounts(struct mount *mnt) { @@ -2414,20 +2435,25 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) return new_ns; } -/* - * Allocate a new namespace structure and populate it with contents - * copied from the namespace of the passed in task structure. - */ -static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, - struct user_namespace *user_ns, struct fs_struct *fs) +struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, + struct user_namespace *user_ns, struct fs_struct *new_fs) { struct mnt_namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct mount *p, *q; - struct mount *old = mnt_ns->root; + struct mount *old; struct mount *new; int copy_flags; + BUG_ON(!ns); + + if (likely(!(flags & CLONE_NEWNS))) { + get_mnt_ns(ns); + return ns; + } + + old = ns->root; + new_ns = alloc_mnt_ns(user_ns); if (IS_ERR(new_ns)) return new_ns; @@ -2435,7 +2461,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, namespace_lock(); /* First pass: copy the tree topology */ copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; - if (user_ns != mnt_ns->user_ns) + if (user_ns != ns->user_ns) copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { @@ -2444,9 +2470,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, return ERR_CAST(new); } new_ns->root = new; - br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2457,13 +2481,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new; while (p) { q->mnt_ns = new_ns; - if (fs) { - if (&p->mnt == fs->root.mnt) { - fs->root.mnt = mntget(&q->mnt); + if (new_fs) { + if (&p->mnt == new_fs->root.mnt) { + new_fs->root.mnt = mntget(&q->mnt); rootmnt = &p->mnt; } - if (&p->mnt == fs->pwd.mnt) { - fs->pwd.mnt = mntget(&q->mnt); + if (&p->mnt == new_fs->pwd.mnt) { + new_fs->pwd.mnt = mntget(&q->mnt); pwdmnt = &p->mnt; } } @@ -2484,23 +2508,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, return new_ns; } -struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, - struct user_namespace *user_ns, struct fs_struct *new_fs) -{ - struct mnt_namespace *new_ns; - - BUG_ON(!ns); - get_mnt_ns(ns); - - if (!(flags & CLONE_NEWNS)) - return ns; - - new_ns = dup_mnt_ns(ns, user_ns, new_fs); - - put_mnt_ns(ns); - return new_ns; -} - /** * create_mnt_ns - creates a private namespace and adds a root filesystem * @mnt: pointer to the new root filesystem mountpoint @@ -2593,7 +2600,7 @@ out_type: /* * Return true if path is reachable from root * - * namespace_sem or vfsmount_lock is held + * namespace_sem or mount_lock is held */ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, const struct path *root) @@ -2608,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(&vfsmount_lock); + read_seqlock_excl(&mount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(&vfsmount_lock); + read_sequnlock_excl(&mount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2701,7 +2708,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, if (!is_path_reachable(old_mnt, old.dentry, &new)) goto out4; root_mp->m_count++; /* pin it so it won't go away */ - br_write_lock(&vfsmount_lock); + lock_mount_hash(); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { @@ -2713,7 +2720,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); chroot_fs_refs(&root, &new); put_mountpoint(root_mp); error = 0; @@ -2767,8 +2774,6 @@ void __init mnt_init(void) unsigned u; int err; - init_rwsem(&namespace_sem); - mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); @@ -2785,8 +2790,6 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mountpoint_hashtable[u]); - br_lock_init(&vfsmount_lock); - err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2802,11 +2805,7 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!atomic_dec_and_test(&ns->count)) return; - namespace_lock(); - br_write_lock(&vfsmount_lock); - umount_tree(ns->root, 0); - br_write_unlock(&vfsmount_lock); - namespace_unlock(); + drop_collected_mounts(&ns->root->mnt); free_mnt_ns(ns); } @@ -2829,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ if (!IS_ERR_OR_NULL(mnt)) { - br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_ns = NULL; - br_write_unlock(&vfsmount_lock); + synchronize_rcu(); /* yecchhh... */ mntput(mnt); } } @@ -2875,7 +2873,7 @@ bool fs_fully_visible(struct file_system_type *type) if (unlikely(!ns)) return false; - namespace_lock(); + down_read(&namespace_sem); list_for_each_entry(mnt, &ns->list, mnt_list) { struct mount *child; if (mnt->mnt.mnt_sb->s_type != type) @@ -2896,7 +2894,7 @@ bool fs_fully_visible(struct file_system_type *type) next: ; } found: - namespace_unlock(); + up_read(&namespace_sem); return visible; } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 3be047474bf..c320ac52353 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -339,9 +339,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags) if (val) goto finished; - DDPRINTK("ncp_lookup_validate: %s/%s not valid, age=%ld, server lookup\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - NCP_GET_AGE(dentry)); + DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n", + dentry, NCP_GET_AGE(dentry)); len = sizeof(__name); if (ncp_is_server_root(dir)) { @@ -359,8 +358,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags) res = ncp_obtain_info(server, dir, __name, &(finfo.i)); } finfo.volume = finfo.i.volNumber; - DDPRINTK("ncp_lookup_validate: looked for %s/%s, res=%d\n", - dentry->d_parent->d_name.name, __name, res); + DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n", + dentry->d_parent, __name, res); /* * If we didn't find it, or if it has a different dirEntNum to * what we remember, it's not valid any more. @@ -454,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx) ctl.page = NULL; ctl.cache = NULL; - DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, + DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file, (int) ctx->pos); result = -EIO; @@ -740,12 +738,10 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx, int more; size_t bufsize; - DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", - dentry->d_parent->d_name.name, dentry->d_name.name, + DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file, (unsigned long) ctx->pos); - PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", - dentry->d_name.name, NCP_FINFO(dir)->volNumber, - NCP_FINFO(dir)->dirEntNum); + PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n", + file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum); err = ncp_initialize_search(server, dir, &seq); if (err) { @@ -850,8 +846,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig if (!ncp_conn_valid(server)) goto finished; - PPRINTK("ncp_lookup: server lookup for %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry); len = sizeof(__name); if (ncp_is_server_root(dir)) { @@ -867,8 +862,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig if (!res) res = ncp_obtain_info(server, dir, __name, &(finfo.i)); } - PPRINTK("ncp_lookup: looked for %s/%s, res=%d\n", - dentry->d_parent->d_name.name, __name, res); + PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res); /* * If we didn't find an entry, make a negative dentry. */ @@ -915,8 +909,7 @@ out: return error; out_close: - PPRINTK("ncp_instantiate: %s/%s failed, closing file\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry); ncp_close_file(NCP_SERVER(dir), finfo->file_handle); goto out; } @@ -930,8 +923,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode, int opmode; __u8 __name[NCP_MAXPATHLEN + 1]; - PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n", - dentry->d_parent->d_name.name, dentry->d_name.name, mode); + PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode); ncp_age_dentry(server, dentry); len = sizeof(__name); @@ -960,8 +952,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode, error = -ENAMETOOLONG; else if (result < 0) error = result; - DPRINTK("ncp_create: %s/%s failed\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_create: %pd2 failed\n", dentry); goto out; } opmode = O_WRONLY; @@ -994,8 +985,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int error, len; __u8 __name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_mkdir: making %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_mkdir: making %pd2\n", dentry); ncp_age_dentry(server, dentry); len = sizeof(__name); @@ -1032,8 +1022,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) int error, result, len; __u8 __name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_rmdir: removing %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_rmdir: removing %pd2\n", dentry); len = sizeof(__name); error = ncp_io2vol(server, __name, &len, dentry->d_name.name, @@ -1078,8 +1067,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) int error; server = NCP_SERVER(dir); - DPRINTK("ncp_unlink: unlinking %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_unlink: unlinking %pd2\n", dentry); /* * Check whether to close the file ... @@ -1099,8 +1087,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) #endif switch (error) { case 0x00: - DPRINTK("ncp: removed %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp: removed %pd2\n", dentry); break; case 0x85: case 0x8A: @@ -1133,9 +1120,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, int old_len, new_len; __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_rename: %s/%s to %s/%s\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - new_dentry->d_parent->d_name.name, new_dentry->d_name.name); + DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry); ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); @@ -1165,8 +1150,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, #endif switch (error) { case 0x00: - DPRINTK("ncp renamed %s -> %s.\n", - old_dentry->d_name.name,new_dentry->d_name.name); + DPRINTK("ncp renamed %pd -> %pd.\n", + old_dentry, new_dentry); break; case 0x9E: error = -ENAMETOOLONG; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 122e260247f..8f5074e1ecb 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -107,8 +107,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) void* freepage; size_t freelen; - DPRINTK("ncp_file_read: enter %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_read: enter %pd2\n", dentry); pos = *ppos; @@ -166,8 +165,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) file_accessed(file); - DPRINTK("ncp_file_read: exit %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_read: exit %pd2\n", dentry); outrel: ncp_inode_close(inode); return already_read ? already_read : error; @@ -184,8 +182,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * int errno; void* bouncebuffer; - DPRINTK("ncp_file_write: enter %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_write: enter %pd2\n", dentry); if ((ssize_t) count < 0) return -EINVAL; pos = *ppos; @@ -264,8 +261,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * i_size_write(inode, pos); mutex_unlock(&inode->i_mutex); } - DPRINTK("ncp_file_write: exit %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_write: exit %pd2\n", dentry); outrel: ncp_inode_close(inode); return already_written ? already_written : errno; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 4659da67e7f..2cf2ebecb55 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -782,6 +782,17 @@ out: return error; } +static void delayed_free(struct rcu_head *p) +{ + struct ncp_server *server = container_of(p, struct ncp_server, rcu); +#ifdef CONFIG_NCPFS_NLS + /* unload the NLS charsets */ + unload_nls(server->nls_vol); + unload_nls(server->nls_io); +#endif /* CONFIG_NCPFS_NLS */ + kfree(server); +} + static void ncp_put_super(struct super_block *sb) { struct ncp_server *server = NCP_SBP(sb); @@ -792,11 +803,6 @@ static void ncp_put_super(struct super_block *sb) ncp_stop_tasks(server); -#ifdef CONFIG_NCPFS_NLS - /* unload the NLS charsets */ - unload_nls(server->nls_vol); - unload_nls(server->nls_io); -#endif /* CONFIG_NCPFS_NLS */ mutex_destroy(&server->rcv.creq_mutex); mutex_destroy(&server->root_setup_lock); mutex_destroy(&server->mutex); @@ -813,8 +819,7 @@ static void ncp_put_super(struct super_block *sb) vfree(server->rxbuf); vfree(server->txbuf); vfree(server->packet); - sb->s_fs_info = NULL; - kfree(server); + call_rcu(&server->rcu, delayed_free); } static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index c51b2c54353..b81e97adc5a 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h @@ -38,7 +38,7 @@ struct ncp_mount_data_kernel { }; struct ncp_server { - + struct rcu_head rcu; struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of interest for us later, so we store it completely. */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9a8676f3335..812154aff98 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -98,9 +98,7 @@ nfs_opendir(struct inode *inode, struct file *filp) struct nfs_open_dir_context *ctx; struct rpc_cred *cred; - dfprintk(FILE, "NFS: open dir(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dfprintk(FILE, "NFS: open dir(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); @@ -297,11 +295,10 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des if (ctx->duped > 0 && ctx->dup_cookie == *desc->dir_cookie) { if (printk_ratelimit()) { - pr_notice("NFS: directory %s/%s contains a readdir loop." + pr_notice("NFS: directory %pD2 contains a readdir loop." "Please contact your server vendor. " "The file: %s has duplicate cookie %llu\n", - desc->file->f_dentry->d_parent->d_name.name, - desc->file->f_dentry->d_name.name, + desc->file, array->array[i].string.name, *desc->dir_cookie); } @@ -822,9 +819,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) struct nfs_open_dir_context *dir_ctx = file->private_data; int res = 0; - dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (long long)ctx->pos); + dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", + file, (long long)ctx->pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); /* @@ -880,22 +876,17 @@ out: nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; - dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - res); + dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); return res; } static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); struct nfs_open_dir_context *dir_ctx = filp->private_data; - dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, - offset, whence); + dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", + filp, offset, whence); mutex_lock(&inode->i_mutex); switch (whence) { @@ -925,15 +916,12 @@ out: static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); - dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - datasync); + dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync); mutex_lock(&inode->i_mutex); - nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); mutex_unlock(&inode->i_mutex); return 0; } @@ -1073,9 +1061,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) } if (is_bad_inode(inode)) { - dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", + __func__, dentry); goto out_bad; } @@ -1125,9 +1112,8 @@ out_set_verifier: nfs_advise_use_readdirplus(dir); out_valid_noent: dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", + __func__, dentry); return 1; out_zap_parent: nfs_zap_caches(dir); @@ -1153,18 +1139,16 @@ out_zap_parent: goto out_valid; dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", + __func__, dentry); return 0; out_error: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name, error); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", + __func__, dentry, error); return error; } @@ -1188,16 +1172,14 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) * eventually need to do something more here. */ if (!inode) { - dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n", + __func__, dentry); return 1; } if (is_bad_inode(inode)) { - dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", + __func__, dentry); return 0; } @@ -1212,9 +1194,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) */ static int nfs_dentry_delete(const struct dentry *dentry) { - dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - dentry->d_flags); + dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n", + dentry, dentry->d_flags); /* Unhash any dentry with a stale inode */ if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) @@ -1292,8 +1273,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in struct nfs4_label *label = NULL; int error; - dfprintk(VFS, "NFS: lookup(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); @@ -1424,8 +1404,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* Expect a negative dentry */ BUG_ON(dentry->d_inode); - dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); err = nfs_check_flags(open_flags); if (err) @@ -1614,8 +1594,8 @@ int nfs_create(struct inode *dir, struct dentry *dentry, int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - dfprintk(VFS, "NFS: create(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: create(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; @@ -1641,8 +1621,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) struct iattr attr; int status; - dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); if (!new_valid_dev(rdev)) return -EINVAL; @@ -1670,8 +1650,8 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct iattr attr; int error; - dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; @@ -1698,8 +1678,8 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; - dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); trace_nfs_rmdir_enter(dir, dentry); if (dentry->d_inode) { @@ -1734,8 +1714,7 @@ static int nfs_safe_remove(struct dentry *dentry) struct inode *inode = dentry->d_inode; int error = -EBUSY; - dfprintk(VFS, "NFS: safe_remove(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry); /* If the dentry was sillyrenamed, we simply call d_delete() */ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { @@ -1768,8 +1747,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) int error; int need_rehash = 0; - dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id, + dir->i_ino, dentry); trace_nfs_unlink_enter(dir, dentry); spin_lock(&dentry->d_lock); @@ -1819,8 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) unsigned int pathlen = strlen(symname); int error; - dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name, symname); + dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry, symname); if (pathlen > PAGE_SIZE) return -ENAMETOOLONG; @@ -1842,9 +1821,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { - dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", + dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, - dentry->d_name.name, symname, error); + dentry, symname, error); d_drop(dentry); __free_page(page); return error; @@ -1871,9 +1850,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) struct inode *inode = old_dentry->d_inode; int error; - dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n", + old_dentry, dentry); trace_nfs_link_enter(inode, dir, dentry); NFS_PROTO(inode)->return_delegation(inode); @@ -1921,9 +1899,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *rehash = NULL; int error = -EBUSY; - dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - new_dentry->d_parent->d_name.name, new_dentry->d_name.name, + dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", + old_dentry, new_dentry, d_count(new_dentry)); trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 91ff089d341..d71d66c9e0a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -124,9 +124,8 @@ static inline int put_dreq(struct nfs_direct_req *dreq) ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { #ifndef CONFIG_NFS_SWAP - dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", - iocb->ki_filp->f_path.dentry->d_name.name, - (long long) pos, nr_segs); + dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", + iocb->ki_filp, (long long) pos, nr_segs); return -EINVAL; #else @@ -909,10 +908,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - count, (long long) pos); + dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", + file, count, (long long) pos); retval = 0; if (!count) @@ -965,10 +962,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); - dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - count, (long long) pos); + dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", + file, count, (long long) pos); retval = generic_write_checks(file, &pos, &count, 0); if (retval) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1e6bfdbc1af..e2fcacf07de 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -65,9 +65,7 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; - dprintk("NFS: open file(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: open file(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_check_flags(filp->f_flags); @@ -81,9 +79,7 @@ nfs_file_open(struct inode *inode, struct file *filp) int nfs_file_release(struct inode *inode, struct file *filp) { - dprintk("NFS: release(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); @@ -123,10 +119,8 @@ force_reval: loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { - dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - offset, whence); + dprintk("NFS: llseek file(%pD2, %lld, %d)\n", + filp, offset, whence); /* * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate @@ -150,12 +144,9 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek); int nfs_file_flush(struct file *file, fl_owner_t id) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); - dprintk("NFS: flush(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("NFS: flush(%pD2)\n", file); nfs_inc_stats(inode, NFSIOS_VFSFLUSH); if ((file->f_mode & FMODE_WRITE) == 0) @@ -177,15 +168,14 @@ ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); - dprintk("NFS: read(%s/%s, %lu@%lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, + dprintk("NFS: read(%pD2, %lu@%lu)\n", + iocb->ki_filp, (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); @@ -203,13 +193,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); ssize_t res; - dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); + dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", + filp, (unsigned long) count, (unsigned long long) *ppos); res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) { @@ -224,12 +212,10 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read); int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); int status; - dprintk("NFS: mmap(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dprintk("NFS: mmap(%pD2)\n", file); /* Note: generic_file_mmap() returns ENOSYS on nommu systems * so we call that before revalidating the mapping @@ -258,15 +244,12 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); int nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); int have_error, do_resend, status; int ret = 0; - dprintk("NFS: fsync file(%s/%s) datasync %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - datasync); + dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); @@ -371,10 +354,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page *page; int once_thru = 0; - dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); start: /* @@ -414,10 +395,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, struct nfs_open_context *ctx = nfs_file_open_context(file); int status; - dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); /* * Zero any uninitialised parts of the page, and then mark the page @@ -601,22 +580,21 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct file *filp = vma->vm_file; - struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = file_inode(filp); unsigned pagelen; int ret = VM_FAULT_NOPAGE; struct address_space *mapping; - dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - filp->f_mapping->host->i_ino, + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n", + filp, filp->f_mapping->host->i_ino, (long long)page_offset(page)); /* make sure the cache has finished storing the page */ - nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); + nfs_fscache_wait_on_page_write(NFS_I(inode), page); lock_page(page); mapping = page_file_mapping(page); - if (mapping != dentry->d_inode->i_mapping) + if (mapping != inode->i_mapping) goto out_unlock; wait_on_page_writeback(page); @@ -659,22 +637,21 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); unsigned long written = 0; ssize_t result; size_t count = iov_length(iov, nr_segs); - result = nfs_key_timeout_notify(iocb->ki_filp, inode); + result = nfs_key_timeout_notify(file, inode); if (result) return result; - if (iocb->ki_filp->f_flags & O_DIRECT) + if (file->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); - dprintk("NFS: write(%s/%s, %lu@%Ld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (long long) pos); + dprintk("NFS: write(%pD2, %lu@%Ld)\n", + file, (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -682,8 +659,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, /* * O_APPEND implies that we must revalidate the file length. */ - if (iocb->ki_filp->f_flags & O_APPEND) { - result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (file->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, file); if (result) goto out; } @@ -697,8 +674,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, written = result; /* Return error values for O_DSYNC and IS_SYNC() */ - if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { - int err = vfs_fsync(iocb->ki_filp, 0); + if (result >= 0 && nfs_need_sync_write(file, inode)) { + int err = vfs_fsync(file, 0); if (err < 0) result = err; } @@ -717,14 +694,12 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, struct file *filp, loff_t *ppos, size_t count, unsigned int flags) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned long written = 0; ssize_t ret; - dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); + dprintk("NFS splice_write(%pD2, %lu@%llu)\n", + filp, (unsigned long) count, (unsigned long long) *ppos); /* * The combination of splice and an O_APPEND destination is disallowed. @@ -883,10 +858,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) int ret = -ENOLCK; int is_local = 0; - dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags, + dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n", + filp, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); nfs_inc_stats(inode, NFSIOS_VFSLOCK); @@ -923,10 +896,8 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = filp->f_mapping->host; int is_local = 0; - dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags); + dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n", + filp, fl->fl_type, fl->fl_flags); if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; @@ -960,9 +931,7 @@ EXPORT_SYMBOL_GPL(nfs_flock); */ int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { - dprintk("NFS: setlease(%s/%s, arg=%ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, arg); + dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg); return -EINVAL; } EXPORT_SYMBOL_GPL(nfs_setlease); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 348b535cd78..b5a0afc3ee1 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -253,9 +253,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, dprintk("--> nfs_do_submount()\n"); - dprintk("%s: submounting on %s/%s\n", __func__, - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("%s: submounting on %pd2\n", __func__, + dentry); if (page == NULL) goto out; devname = nfs_devname(dentry, page, PAGE_SIZE); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 90cb10d7b69..01b6f6a49d1 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -321,7 +321,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call create %s\n", dentry->d_name.name); + dprintk("NFS call create %pd\n", dentry); data = nfs3_alloc_createdata(); if (data == NULL) @@ -548,7 +548,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s\n", dentry->d_name.name); + dprintk("NFS call symlink %pd\n", dentry); data = nfs3_alloc_createdata(); if (data == NULL) @@ -576,7 +576,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mkdir %s\n", dentry->d_name.name); + dprintk("NFS call mkdir %pd\n", dentry); sattr->ia_mode &= ~current_umask(); @@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, + dprintk("NFS call mknod %pd %u:%u\n", dentry, MAJOR(rdev), MINOR(rdev)); sattr->ia_mode &= ~current_umask(); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 1f01b55692e..8de3407e036 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -31,9 +31,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) * -EOPENSTALE. The VFS will retry the lookup/create/open. */ - dprintk("NFS: open file(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("NFS: open file(%pd2)\n", dentry); if ((openflags & O_ACCMODE) == 3) openflags--; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index c08cbf40c59..4e7f05d3e9d 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -292,8 +292,7 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, if (locations == NULL || locations->nlocations <= 0) goto out; - dprintk("%s: referral at %s/%s\n", __func__, - dentry->d_parent->d_name.name, dentry->d_name.name); + dprintk("%s: referral at %pd2\n", __func__, dentry); page = (char *) __get_free_page(GFP_USER); if (!page) @@ -357,8 +356,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry * mnt = ERR_PTR(-ENOENT); parent = dget_parent(dentry); - dprintk("%s: getting locations for %s/%s\n", - __func__, parent->d_name.name, dentry->d_name.name); + dprintk("%s: getting locations for %pd2\n", + __func__, dentry); err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page); dput(parent); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ab33c0792d..659990c0109 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3771,9 +3771,8 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; - dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, - dentry->d_parent->d_name.name, - dentry->d_name.name, + dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, + dentry, (unsigned long long)cookie); nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index a8f57c728df..fddbba2d9ef 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -235,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, }; int status = -ENOMEM; - dprintk("NFS call create %s\n", dentry->d_name.name); + dprintk("NFS call create %pd\n", dentry); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; @@ -265,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode; int status = -ENOMEM; - dprintk("NFS call mknod %s\n", dentry->d_name.name); + dprintk("NFS call mknod %pd\n", dentry); mode = sattr->ia_mode; if (S_ISFIFO(mode)) { @@ -423,7 +423,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, }; int status = -ENAMETOOLONG; - dprintk("NFS call symlink %s\n", dentry->d_name.name); + dprintk("NFS call symlink %pd\n", dentry); if (len > NFS2_MAXPATHLEN) goto out; @@ -462,7 +462,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) }; int status = -ENOMEM; - dprintk("NFS call mkdir %s\n", dentry->d_name.name); + dprintk("NFS call mkdir %pd\n", dentry); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 0c29b1bb393..11d78944de7 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -495,9 +495,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) struct rpc_task *task; int error = -EBUSY; - dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - d_count(dentry)); + dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n", + dentry, d_count(dentry)); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); /* @@ -521,8 +520,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) SILLYNAME_FILEID_LEN, fileid, SILLYNAME_COUNTER_LEN, sillycounter); - dfprintk(VFS, "NFS: trying to rename %s to %s\n", - dentry->d_name.name, silly); + dfprintk(VFS, "NFS: trying to rename %pd to %s\n", + dentry, silly); sdentry = lookup_one_len(silly, dentry->d_parent, slen); /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ac1dc331ba3..c1d548211c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -954,10 +954,8 @@ int nfs_updatepage(struct file *file, struct page *page, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, count, - (long long)(page_file_offset(page) + offset)); + dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n", + file, count, (long long)(page_file_offset(page) + offset)); if (nfs_can_extend_write(file, page, inode)) { count = max(count + offset, nfs_page_length(page)); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e0a65a9e37e..9c271f42604 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) status = vfs_rmdir(parent->d_inode, child); if (status) - printk("failed to remove client recovery directory %s\n", - child->d_name.name); + printk("failed to remove client recovery directory %pd\n", + child); /* Keep trying, success or failure: */ return 0; } @@ -410,15 +410,15 @@ out: nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); } static int load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %s in recovery directory\n", - child->d_name.name); + printk("nfsd4: illegal name %pd in recovery directory\n", + child); /* Keep trying; maybe the others are OK: */ return 0; } @@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) { status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); return status; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0874998a49c..f36a30a9f2d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3008,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return NULL; locks_init_lock(fl); fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_LEASE; + fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; fl->fl_owner = (fl_owner_t)(dp->dl_file); @@ -3843,9 +3843,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_confirm on file %pd\n", + cstate->current_fh.fh_dentry); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) @@ -3922,9 +3921,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", + cstate->current_fh.fh_dentry); /* We don't yet support WANT bits: */ if (od->od_deleg_want) @@ -3980,9 +3978,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("NFSD: nfsd4_close on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_close on file %pd\n", + cstate->current_fh.fh_dentry); nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 814afaa4458..3d0e15ae6f7 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) tdentry = parent; } if (tdentry != exp->ex_path.dentry) - dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); + dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry); rv = (tdentry == exp->ex_path.dentry); dput(tdentry); return rv; @@ -253,8 +253,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (S_ISDIR(dentry->d_inode->i_mode) && (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n", + dentry); } fhp->fh_dentry = dentry; @@ -361,10 +361,9 @@ skip_pseudoflavor_check: error = nfsd_permission(rqstp, exp, dentry, access); if (error) { - dprintk("fh_verify: %s/%s permission failure, " + dprintk("fh_verify: %pd2 permission failure, " "acc=%x, error=%d\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, + dentry, access, ntohl(error)); } out: @@ -514,14 +513,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; - struct dentry *parent = dentry->d_parent; __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), (long) exp->ex_path.dentry->d_inode->i_ino, - parent->d_name.name, dentry->d_name.name, + dentry, (inode ? inode->i_ino : 0)); /* Choose filehandle version and fsid type based on @@ -534,13 +532,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fh_put(ref_fh); if (fhp->fh_locked || fhp->fh_dentry) { - printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", - parent->d_name.name, dentry->d_name.name); + printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", + dentry); } if (fhp->fh_maxsize < NFS_FHSIZE) - printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", + printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n", fhp->fh_maxsize, - parent->d_name.name, dentry->d_name.name); + dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp; @@ -613,8 +611,8 @@ out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); goto out; out_negative: - printk(KERN_ERR "fh_update: %s/%s still negative!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk(KERN_ERR "fh_update: %pd2 still negative!\n", + dentry); goto out; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index e5e6707ba68..4775bc4896c 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -173,8 +173,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) BUG_ON(!dentry); if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %s/%s already locked!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", + dentry); return; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c827acb0e94..94b5f5d2bfe 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -427,7 +427,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, goto out_nfserr; fh_lock(fhp); - host_err = notify_change(dentry, iap); + host_err = notify_change(dentry, iap, NULL); err = nfserrno(host_err); fh_unlock(fhp); } @@ -988,7 +988,11 @@ static void kill_suid(struct dentry *dentry) ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &ia); + /* + * Note we call this on write, so notify_change will not + * encounter any conflicting delegations: + */ + notify_change(dentry, &ia, NULL); mutex_unlock(&dentry->d_inode->i_mutex); } @@ -1317,9 +1321,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!fhp->fh_locked) { /* not actually possible */ printk(KERN_ERR - "nfsd_create: parent %s/%s not locked!\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + "nfsd_create: parent %pd2 not locked!\n", + dentry); err = nfserr_io; goto out; } @@ -1329,8 +1332,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ err = nfserr_exist; if (dchild->d_inode) { - dprintk("nfsd_create: dentry %s/%s not negative!\n", - dentry->d_name.name, dchild->d_name.name); + dprintk("nfsd_create: dentry %pd/%pd not negative!\n", + dentry, dchild); goto out; } @@ -1737,7 +1740,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserrno(host_err); goto out_dput; } - host_err = vfs_link(dold, dirp, dnew); + host_err = vfs_link(dold, dirp, dnew, NULL); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1838,7 +1841,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (host_err) goto out_dput_new; } - host_err = vfs_rename(fdir, odentry, tdir, ndentry); + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1911,7 +1914,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_put; if (type != S_IFDIR) - host_err = vfs_unlink(dirp, rdentry); + host_err = vfs_unlink(dirp, rdentry, NULL); else host_err = vfs_rmdir(dirp, rdentry); if (!host_err) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 2778b0255dc..ffb9b367573 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -55,7 +55,7 @@ * * Return 1 if the attributes match and 0 if not. * - * NOTE: This function runs with the inode->i_lock spin lock held so it is not + * NOTE: This function runs with the inode_hash_lock spin lock held so it is not * allowed to sleep. */ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index f87f9bd1edf..f29a90fde61 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -386,19 +386,9 @@ static int ocfs2_read_locked_inode(struct inode *inode, u32 generation = 0; status = -EINVAL; - if (inode == NULL || inode->i_sb == NULL) { - mlog(ML_ERROR, "bad inode\n"); - return status; - } sb = inode->i_sb; osb = OCFS2_SB(sb); - if (!args) { - mlog(ML_ERROR, "bad inode args\n"); - make_bad_inode(inode); - return status; - } - /* * To improve performance of cold-cache inode stats, we take * the cluster lock here if possible. diff --git a/fs/open.c b/fs/open.c index d420331ca32..4b3e1edf2fe 100644 --- a/fs/open.c +++ b/fs/open.c @@ -57,7 +57,8 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ret | ATTR_FORCE; mutex_lock(&dentry->d_inode->i_mutex); - ret = notify_change(dentry, &newattrs); + /* Note any delegations or leases have already been broken: */ + ret = notify_change(dentry, &newattrs, NULL); mutex_unlock(&dentry->d_inode->i_mutex); return ret; } @@ -464,21 +465,28 @@ out: static int chmod_common(struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; + struct inode *delegated_inode = NULL; struct iattr newattrs; int error; error = mnt_want_write(path->mnt); if (error) return error; +retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chmod(path, mode); if (error) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(path->dentry, &newattrs); + error = notify_change(path->dentry, &newattrs, &delegated_inode); out_unlock: mutex_unlock(&inode->i_mutex); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write(path->mnt); return error; } @@ -522,6 +530,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) static int chown_common(struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; + struct inode *delegated_inode = NULL; int error; struct iattr newattrs; kuid_t uid; @@ -546,12 +555,17 @@ static int chown_common(struct path *path, uid_t user, gid_t group) if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; +retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) - error = notify_change(path->dentry, &newattrs); + error = notify_change(path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); - + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } return error; } @@ -685,7 +699,6 @@ static int do_dentry_open(struct file *f, } f->f_mapping = inode->i_mapping; - file_sb_list_add(f, inode->i_sb); if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; @@ -693,6 +706,10 @@ static int do_dentry_open(struct file *f, } f->f_op = fops_get(inode->i_fop); + if (unlikely(WARN_ON(!f->f_op))) { + error = -ENODEV; + goto cleanup_all; + } error = security_file_open(f, cred); if (error) @@ -702,7 +719,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; - if (!open && f->f_op) + if (!open) open = f->f_op->open; if (open) { error = open(inode, f); @@ -720,7 +737,6 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); - file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { @@ -1023,7 +1039,7 @@ int filp_close(struct file *filp, fl_owner_t id) return 0; } - if (filp->f_op && filp->f_op->flush) + if (filp->f_op->flush) retval = filp->f_op->flush(filp, id); if (likely(!(filp->f_mode & FMODE_PATH))) { diff --git a/fs/pnode.c b/fs/pnode.c index 9af0df15256..c7221bb1980 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -264,12 +264,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, prev_src_mnt = child; } out: - br_write_lock(&vfsmount_lock); + lock_mount_hash(); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0); } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return ret; } @@ -278,8 +278,7 @@ out: */ static inline int do_refcount_check(struct mount *mnt, int count) { - int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; - return (mycount > count); + return mnt_get_count(mnt) > count; } /* @@ -311,7 +310,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0); + child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); if (child && list_empty(&child->mnt_mounts) && (ret = do_refcount_check(child, 1))) break; @@ -333,8 +332,8 @@ static void __propagate_umount(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - struct mount *child = __lookup_mnt(&m->mnt, - mnt->mnt_mountpoint, 0); + struct mount *child = __lookup_mnt_last(&m->mnt, + mnt->mnt_mountpoint); /* * umount the child only if the child has no * other children diff --git a/fs/proc/self.c b/fs/proc/self.c index 6b6a993b5c2..ffeb202ec94 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - static const struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, .follow_link = proc_self_follow_link, - .put_link = proc_self_put_link, + .put_link = kfree_put_link, }; static unsigned self_inum; diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 5fe34c355e8..439406e081a 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -20,15 +20,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) struct proc_mounts *p = proc_mounts(file->private_data); struct mnt_namespace *ns = p->ns; unsigned res = POLLIN | POLLRDNORM; + int event; poll_wait(file, &p->ns->poll, wait); - br_read_lock(&vfsmount_lock); - if (p->m.poll_event != ns->event) { - p->m.poll_event = ns->event; + event = ACCESS_ONCE(ns->event); + if (p->m.poll_event != event) { + p->m.poll_event = event; res |= POLLERR | POLLPRI; } - br_read_unlock(&vfsmount_lock); return res; } diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index d024505ba00..e62c8183777 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -60,10 +60,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, struct buffer_head *bh; *res_dir = NULL; - if (!dir->i_sb) { - printk(KERN_WARNING "qnx4: no superblock on dir.\n"); - return NULL; - } bh = NULL; block = offset = blkofs = 0; while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { diff --git a/fs/read_write.c b/fs/read_write.c index e3cd280b158..58e440df1bc 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -257,7 +257,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) fn = no_llseek; if (file->f_mode & FMODE_LSEEK) { - if (file->f_op && file->f_op->llseek) + if (file->f_op->llseek) fn = file->f_op->llseek; } return fn(file, offset, whence); @@ -384,7 +384,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) + if (!file->f_op->read && !file->f_op->aio_read) return -EINVAL; if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) return -EFAULT; @@ -433,7 +433,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t const char __user *p; ssize_t ret; - if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) + if (!file->f_op->write && !file->f_op->aio_write) return -EINVAL; old_fs = get_fs(); @@ -460,7 +460,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) + if (!file->f_op->write && !file->f_op->aio_write) return -EINVAL; if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; @@ -727,11 +727,6 @@ static ssize_t do_readv_writev(int type, struct file *file, io_fn_t fn; iov_fn_t fnv; - if (!file->f_op) { - ret = -EINVAL; - goto out; - } - ret = rw_copy_check_uvector(type, uvector, nr_segs, ARRAY_SIZE(iovstack), iovstack, &iov); if (ret <= 0) @@ -778,7 +773,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) + if (!file->f_op->aio_read && !file->f_op->read) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -791,7 +786,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) + if (!file->f_op->aio_write && !file->f_op->write) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); @@ -906,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, io_fn_t fn; iov_fn_t fnv; - ret = -EINVAL; - if (!file->f_op) - goto out; - ret = -EFAULT; if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) goto out; @@ -965,7 +956,7 @@ static size_t compat_readv(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) + if (!file->f_op->aio_read && !file->f_op->read) goto out; ret = compat_do_readv_writev(READ, file, vec, vlen, pos); @@ -1032,7 +1023,7 @@ static size_t compat_writev(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) + if (!file->f_op->aio_write && !file->f_op->write) goto out; ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); diff --git a/fs/readdir.c b/fs/readdir.c index 93d71e57431..5b53d995cae 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; - if (!file->f_op || !file->f_op->iterate) + if (!file->f_op->iterate) goto out; res = security_file_permission(file, MAY_READ); diff --git a/fs/select.c b/fs/select.c index dfd5cb18c01..467bb1cb3ea 100644 --- a/fs/select.c +++ b/fs/select.c @@ -454,7 +454,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) const struct file_operations *f_op; f_op = f.file->f_op; mask = DEFAULT_POLLMASK; - if (f_op && f_op->poll) { + if (f_op->poll) { wait_key_set(wait, in, out, bit, busy_flag); mask = (*f_op->poll)(f.file, wait); @@ -761,7 +761,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, mask = POLLNVAL; if (f.file) { mask = DEFAULT_POLLMASK; - if (f.file->f_op && f.file->f_op->poll) { + if (f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; pwait->_key |= busy_flag; mask = f.file->f_op->poll(f.file, pwait); diff --git a/fs/splice.c b/fs/splice.c index 3b7ee656f3a..46a08f772d7 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -695,7 +695,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, loff_t pos = sd->pos; int more; - if (!likely(file->f_op && file->f_op->sendpage)) + if (!likely(file->f_op->sendpage)) return -EINVAL; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; @@ -1099,7 +1099,7 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); - if (out->f_op && out->f_op->splice_write) + if (out->f_op->splice_write) splice_write = out->f_op->splice_write; else splice_write = default_file_splice_write; @@ -1125,7 +1125,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - if (in->f_op && in->f_op->splice_read) + if (in->f_op->splice_read) splice_read = in->f_op->splice_read; else splice_read = default_file_splice_read; diff --git a/fs/stat.c b/fs/stat.c index d0ea7ef75e2..ae0c3cef992 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -37,14 +37,21 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) EXPORT_SYMBOL(generic_fillattr); -int vfs_getattr(struct path *path, struct kstat *stat) +/** + * vfs_getattr_nosec - getattr without security checks + * @path: file to get attributes from + * @stat: structure to return attributes in + * + * Get attributes without calling security_inode_getattr. + * + * Currently the only caller other than vfs_getattr is internal to the + * filehandle lookup code, which uses only the inode number and returns + * no attributes to any user. Any other code probably wants + * vfs_getattr. + */ +int vfs_getattr_nosec(struct path *path, struct kstat *stat) { struct inode *inode = path->dentry->d_inode; - int retval; - - retval = security_inode_getattr(path->mnt, path->dentry); - if (retval) - return retval; if (inode->i_op->getattr) return inode->i_op->getattr(path->mnt, path->dentry, stat); @@ -53,6 +60,18 @@ int vfs_getattr(struct path *path, struct kstat *stat) return 0; } +EXPORT_SYMBOL(vfs_getattr_nosec); + +int vfs_getattr(struct path *path, struct kstat *stat) +{ + int retval; + + retval = security_inode_getattr(path->mnt, path->dentry); + if (retval) + return retval; + return vfs_getattr_nosec(path, stat); +} + EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) diff --git a/fs/super.c b/fs/super.c index 0225c20f877..e5f6c2cfac3 100644 --- a/fs/super.c +++ b/fs/super.c @@ -129,33 +129,24 @@ static unsigned long super_cache_count(struct shrinker *shrink, return total_objects; } -static int init_sb_writers(struct super_block *s, struct file_system_type *type) -{ - int err; - int i; - - for (i = 0; i < SB_FREEZE_LEVELS; i++) { - err = percpu_counter_init(&s->s_writers.counter[i], 0); - if (err < 0) - goto err_out; - lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], - &type->s_writers_key[i], 0); - } - init_waitqueue_head(&s->s_writers.wait); - init_waitqueue_head(&s->s_writers.wait_unfrozen); - return 0; -err_out: - while (--i >= 0) - percpu_counter_destroy(&s->s_writers.counter[i]); - return err; -} - -static void destroy_sb_writers(struct super_block *s) +/** + * destroy_super - frees a superblock + * @s: superblock to free + * + * Frees a superblock. + */ +static void destroy_super(struct super_block *s) { int i; - + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); for (i = 0; i < SB_FREEZE_LEVELS; i++) percpu_counter_destroy(&s->s_writers.counter[i]); + security_sb_free(s); + WARN_ON(!list_empty(&s->s_mounts)); + kfree(s->s_subtype); + kfree(s->s_options); + kfree_rcu(s, rcu); } /** @@ -170,111 +161,74 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; + int i; - if (s) { - if (security_sb_alloc(s)) - goto out_free_sb; + if (!s) + return NULL; -#ifdef CONFIG_SMP - s->s_files = alloc_percpu(struct list_head); - if (!s->s_files) - goto err_out; - else { - int i; + if (security_sb_alloc(s)) + goto fail; - for_each_possible_cpu(i) - INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); - } -#else - INIT_LIST_HEAD(&s->s_files); -#endif - if (init_sb_writers(s, type)) - goto err_out; - s->s_flags = flags; - s->s_bdi = &default_backing_dev_info; - INIT_HLIST_NODE(&s->s_instances); - INIT_HLIST_BL_HEAD(&s->s_anon); - INIT_LIST_HEAD(&s->s_inodes); - - if (list_lru_init(&s->s_dentry_lru)) - goto err_out; - if (list_lru_init(&s->s_inode_lru)) - goto err_out_dentry_lru; - - INIT_LIST_HEAD(&s->s_mounts); - init_rwsem(&s->s_umount); - lockdep_set_class(&s->s_umount, &type->s_umount_key); - /* - * sget() can have s_umount recursion. - * - * When it cannot find a suitable sb, it allocates a new - * one (this one), and tries again to find a suitable old - * one. - * - * In case that succeeds, it will acquire the s_umount - * lock of the old one. Since these are clearly distrinct - * locks, and this object isn't exposed yet, there's no - * risk of deadlocks. - * - * Annotate this by putting this lock in a different - * subclass. - */ - down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); - s->s_count = 1; - atomic_set(&s->s_active, 1); - mutex_init(&s->s_vfs_rename_mutex); - lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); - mutex_init(&s->s_dquot.dqio_mutex); - mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); - s->s_maxbytes = MAX_NON_LFS; - s->s_op = &default_op; - s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; - - s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.scan_objects = super_cache_scan; - s->s_shrink.count_objects = super_cache_count; - s->s_shrink.batch = 1024; - s->s_shrink.flags = SHRINKER_NUMA_AWARE; + for (i = 0; i < SB_FREEZE_LEVELS; i++) { + if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) + goto fail; + lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], + &type->s_writers_key[i], 0); } -out: + init_waitqueue_head(&s->s_writers.wait); + init_waitqueue_head(&s->s_writers.wait_unfrozen); + s->s_flags = flags; + s->s_bdi = &default_backing_dev_info; + INIT_HLIST_NODE(&s->s_instances); + INIT_HLIST_BL_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); + + if (list_lru_init(&s->s_dentry_lru)) + goto fail; + if (list_lru_init(&s->s_inode_lru)) + goto fail; + + INIT_LIST_HEAD(&s->s_mounts); + init_rwsem(&s->s_umount); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * sget() can have s_umount recursion. + * + * When it cannot find a suitable sb, it allocates a new + * one (this one), and tries again to find a suitable old + * one. + * + * In case that succeeds, it will acquire the s_umount + * lock of the old one. Since these are clearly distrinct + * locks, and this object isn't exposed yet, there's no + * risk of deadlocks. + * + * Annotate this by putting this lock in a different + * subclass. + */ + down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); + s->s_count = 1; + atomic_set(&s->s_active, 1); + mutex_init(&s->s_vfs_rename_mutex); + lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); + mutex_init(&s->s_dquot.dqio_mutex); + mutex_init(&s->s_dquot.dqonoff_mutex); + init_rwsem(&s->s_dquot.dqptr_sem); + s->s_maxbytes = MAX_NON_LFS; + s->s_op = &default_op; + s->s_time_gran = 1000000000; + s->cleancache_poolid = -1; + + s->s_shrink.seeks = DEFAULT_SEEKS; + s->s_shrink.scan_objects = super_cache_scan; + s->s_shrink.count_objects = super_cache_count; + s->s_shrink.batch = 1024; + s->s_shrink.flags = SHRINKER_NUMA_AWARE; return s; -err_out_dentry_lru: - list_lru_destroy(&s->s_dentry_lru); -err_out: - security_sb_free(s); -#ifdef CONFIG_SMP - if (s->s_files) - free_percpu(s->s_files); -#endif - destroy_sb_writers(s); -out_free_sb: - kfree(s); - s = NULL; - goto out; -} - -/** - * destroy_super - frees a superblock - * @s: superblock to free - * - * Frees a superblock. - */ -static inline void destroy_super(struct super_block *s) -{ - list_lru_destroy(&s->s_dentry_lru); - list_lru_destroy(&s->s_inode_lru); -#ifdef CONFIG_SMP - free_percpu(s->s_files); -#endif - destroy_sb_writers(s); - security_sb_free(s); - WARN_ON(!list_empty(&s->s_mounts)); - kfree(s->s_subtype); - kfree(s->s_options); - kfree(s); +fail: + destroy_super(s); + return NULL; } /* Superblock refcounting */ @@ -756,7 +710,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) make sure there are no rw files opened */ if (remount_ro) { if (force) { - mark_files_ro(sb); + sb->s_readonly_remount = 1; + smp_wmb(); } else { retval = sb_prepare_remount_readonly(sb); if (retval) diff --git a/fs/sync.c b/fs/sync.c index 905f3f6b3d8..6c0ca3b7575 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -177,7 +177,7 @@ SYSCALL_DEFINE1(syncfs, int, fd) */ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { - if (!file->f_op || !file->f_op->fsync) + if (!file->f_op->fsync) return -EINVAL; return file->f_op->fsync(file, start, end, datasync); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 6b4947f75af..ea41649e4ca 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -192,8 +192,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, struct ubifs_dent_node *dent; struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("'%.*s' in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, dir->i_ino); + dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); if (dentry->d_name.len > UBIFS_MAX_NLEN) return ERR_PTR(-ENAMETOOLONG); @@ -225,8 +224,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, * checking. */ err = PTR_ERR(inode); - ubifs_err("dead directory entry '%.*s', error %d", - dentry->d_name.len, dentry->d_name.name, err); + ubifs_err("dead directory entry '%pd', error %d", + dentry, err); ubifs_ro_mode(c, err); goto out; } @@ -260,8 +259,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, * parent directory inode. */ - dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -509,8 +508,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, * changing the parent inode. */ - dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, inode->i_ino, + dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu", + dentry, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); @@ -566,8 +565,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) * deletions. */ - dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, inode->i_ino, + dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu", + dentry, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); @@ -656,8 +655,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) * because we have extra space reserved for deletions. */ - dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, - dentry->d_name.name, inode->i_ino, dir->i_ino); + dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry, + inode->i_ino, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = check_dir_empty(c, dentry->d_inode); @@ -716,8 +715,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) * directory inode. */ - dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -778,8 +777,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%.*s' in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, dir->i_ino); + dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino); if (!new_valid_dev(rdev)) return -EINVAL; @@ -853,8 +851,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, - dentry->d_name.name, symname, dir->i_ino); + dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, + symname, dir->i_ino); if (len > UBIFS_MAX_INO_DATA) return -ENAMETOOLONG; @@ -979,10 +977,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, * separately. */ - dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu", - old_dentry->d_name.len, old_dentry->d_name.name, - old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, - new_dentry->d_name.name, new_dir->i_ino); + dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu", + old_dentry, old_inode->i_ino, old_dir->i_ino, + new_dentry, new_dir->i_ino); ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); if (unlink) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index afaad07f3b2..0e045e75abd 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -933,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, int move = (old_dir != new_dir); struct ubifs_inode *uninitialized_var(new_ui); - dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", - old_dentry->d_name.len, old_dentry->d_name.name, - old_dir->i_ino, new_dentry->d_name.len, - new_dentry->d_name.name, new_dir->i_ino); + dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu", + old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino); ubifs_assert(ubifs_inode(old_dir)->data_len == 0); ubifs_assert(ubifs_inode(new_dir)->data_len == 0); ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 0f7139bdb2c..5e0a63b1b0d 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -303,8 +303,8 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, union ubifs_key key; int err, type; - dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, + host->i_ino, dentry, size); ubifs_assert(mutex_is_locked(&host->i_mutex)); if (size > UBIFS_MAX_INO_DATA) @@ -367,8 +367,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, + host->i_ino, dentry, size); err = check_namespace(&nm); if (err < 0) @@ -426,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) int err, len, written = 0; struct qstr nm = { .name = NULL }; - dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, - dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino, + dentry, size); len = host_ui->xattr_names + host_ui->xattr_cnt; if (!buffer) @@ -529,8 +529,8 @@ int ubifs_removexattr(struct dentry *dentry, const char *name) union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%.*s')", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name); + dbg_gen("xattr '%s', ino %lu ('%pd')", name, + host->i_ino, dentry); ubifs_assert(mutex_is_locked(&host->i_mutex)); err = check_namespace(&nm); diff --git a/fs/utimes.c b/fs/utimes.c index f4fb7eca10e..aa138d64560 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -53,6 +53,7 @@ static int utimes_common(struct path *path, struct timespec *times) int error; struct iattr newattrs; struct inode *inode = path->dentry->d_inode; + struct inode *delegated_inode = NULL; error = mnt_want_write(path->mnt); if (error) @@ -101,9 +102,15 @@ static int utimes_common(struct path *path, struct timespec *times) goto mnt_drop_write_and_out; } } +retry_deleg: mutex_lock(&inode->i_mutex); - error = notify_change(path->dentry, &newattrs); + error = notify_change(path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write_and_out: mnt_drop_write(path->mnt); |