diff options
Diffstat (limited to 'fs')
109 files changed, 2252 insertions, 1430 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index fc06fd27065..dd6f7ee1e31 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", page, (unsigned long)filp->private_data); + /* Update file times before taking page lock */ + file_update_time(filp); + v9inode = V9FS_I(inode); /* make sure the cache has finished storing the page */ v9fs_fscache_wait_on_page_write(inode, page); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fadeba6a5db..62e0cafd6e2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1614,8 +1614,6 @@ static int cleaner_kthread(void *arg) struct btrfs_root *root = arg; do { - vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); - if (!(root->fs_info->sb->s_flags & MS_RDONLY) && mutex_trylock(&root->fs_info->cleaner_mutex)) { btrfs_run_delayed_iputs(root); @@ -1647,7 +1645,6 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; delay = HZ * 30; - vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9aa01ec2138..5caf285c6e4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ssize_t err = 0; size_t count, ocount; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); @@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, num_written = err; } out: + sb_end_write(inode->i_sb); current->backing_dev_info = NULL; return num_written ? num_written : err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 48bdfd2591c..83baec24946 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6629,6 +6629,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 page_start; u64 page_end; + sb_start_pagefault(inode->i_sb); ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (!ret) { ret = file_update_time(vma->vm_file); @@ -6718,12 +6719,15 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); out_unlock: - if (!ret) + if (!ret) { + sb_end_pagefault(inode->i_sb); return VM_FAULT_LOCKED; + } unlock_page(page); out: btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); out_noreserve: + sb_end_pagefault(inode->i_sb); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 43f0012016e..bc2f6ffff3c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -195,6 +195,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (!inode_owner_or_capable(inode)) return -EACCES; + ret = mnt_want_write_file(file); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); ip_oldflags = ip->flags; @@ -209,10 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } } - ret = mnt_want_write_file(file); - if (ret) - goto out_unlock; - if (flags & FS_SYNC_FL) ip->flags |= BTRFS_INODE_SYNC; else @@ -275,9 +275,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode->i_flags = i_oldflags; } - mnt_drop_write_file(file); out_unlock: mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(file); return ret; } @@ -664,6 +664,10 @@ static noinline int btrfs_mksubvol(struct path *parent, struct dentry *dentry; int error; + error = mnt_want_write(parent->mnt); + if (error) + return error; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, parent->dentry, namelen); @@ -699,6 +703,7 @@ out_dput: dput(dentry); out_unlock: mutex_unlock(&dir->i_mutex); + mnt_drop_write(parent->mnt); return error; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7ac7cdcc294..17be3dedacb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -335,6 +335,8 @@ again: if (!h) return ERR_PTR(-ENOMEM); + sb_start_intwrite(root->fs_info->sb); + if (may_wait_transaction(root, type)) wait_current_trans(root); @@ -345,6 +347,7 @@ again: } while (ret == -EBUSY); if (ret < 0) { + sb_end_intwrite(root->fs_info->sb); kmem_cache_free(btrfs_trans_handle_cachep, h); return ERR_PTR(ret); } @@ -548,6 +551,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; + sb_end_intwrite(root->fs_info->sb); + if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root)) { trans->transaction->blocked = 1; @@ -1578,6 +1583,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); put_transaction(cur_trans); + sb_end_intwrite(root->fs_info->sb); + trace_btrfs_transaction_commit(root); btrfs_scrub_continue(root); diff --git a/fs/buffer.c b/fs/buffer.c index c7062c896d7..9f6d2e41281 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write); * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. * - * Direct callers of this function should call vfs_check_frozen() so that page - * fault does not busyloop until the fs is thawed. + * Direct callers of this function should protect against filesystem freezing + * using sb_start_write() - sb_end_write() functions. */ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) @@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, loff_t size; int ret; + /* + * Update file times before taking page lock. We may end up failing the + * fault so this update may be superfluous but who really cares... + */ + file_update_time(vma->vm_file); + lock_page(page); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || @@ -2339,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (unlikely(ret < 0)) goto out_unlock; - /* - * Freezing in progress? We check after the page is marked dirty and - * with page lock held so if the test here fails, we are sure freezing - * code will wait during syncing until the page fault is done - at that - * point page will be dirty and unlocked so freezing code will write it - * and writeprotect it again. - */ set_page_dirty(page); - if (inode->i_sb->s_frozen != SB_UNFROZEN) { - ret = -EAGAIN; - goto out_unlock; - } wait_on_page_writeback(page); return 0; out_unlock: @@ -2365,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, int ret; struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; - /* - * This check is racy but catches the common case. The check in - * __block_page_mkwrite() is reliable. - */ - vfs_check_frozen(sb, SB_FREEZE_WRITE); + sb_start_pagefault(sb); ret = __block_page_mkwrite(vma, vmf, get_block); + sb_end_pagefault(sb); return block_page_mkwrite_return(ret); } EXPORT_SYMBOL(block_page_mkwrite); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8b67304e4b8..452e71a1b75 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) loff_t size, len; int ret; + /* Update time before taking page lock */ + file_update_time(vma->vm_file); + size = i_size_read(inode); if (off + PAGE_CACHE_SIZE <= size) len = PAGE_CACHE_SIZE; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f391f1e7541..e5b77319c97 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -633,44 +633,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, return dentry; } -int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened) -{ - int err; - struct dentry *res = NULL; - - if (!(flags & O_CREAT)) { - if (dentry->d_name.len > NAME_MAX) - return -ENAMETOOLONG; - - err = ceph_init_dentry(dentry); - if (err < 0) - return err; - - return ceph_lookup_open(dir, dentry, file, flags, mode, opened); - } - - if (d_unhashed(dentry)) { - res = ceph_lookup(dir, dentry, 0); - if (IS_ERR(res)) - return PTR_ERR(res); - - if (res) - dentry = res; - } - - /* We don't deal with positive dentries here */ - if (dentry->d_inode) - return finish_no_open(file, res); - - *opened |= FILE_CREATED; - err = ceph_lookup_open(dir, dentry, file, flags, mode, opened); - dput(res); - - return err; -} - /* * If we do a create but get no trace back from the MDS, follow up with * a lookup (the VFS expects us to link up the provided dentry). diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 1b81d6c3187..ecebbc09bfc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/mount.h> #include <linux/namei.h> #include <linux/writeback.h> @@ -106,9 +107,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) } /* - * If the filp already has private_data, that means the file was - * already opened by intent during lookup, and we do nothing. - * * If we already have the requisite capabilities, we can satisfy * the open request locally (no need to request new caps from the * MDS). We do, however, need to inform the MDS (asynchronously) @@ -207,24 +205,29 @@ out: /* - * Do a lookup + open with a single request. - * - * If this succeeds, but some subsequent check in the vfs - * may_open() fails, the struct *file gets cleaned up (i.e. - * ceph_release gets called). So fear not! + * Do a lookup + open with a single request. If we get a non-existent + * file or symlink, return 1 so the VFS can retry. */ -int ceph_lookup_open(struct inode *dir, struct dentry *dentry, +int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct dentry *ret; + struct dentry *dn; int err; - dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", - dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); + dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n", + dir, dentry, dentry->d_name.len, dentry->d_name.name, + d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); + + if (dentry->d_name.len > NAME_MAX) + return -ENAMETOOLONG; + + err = ceph_init_dentry(dentry); + if (err < 0) + return err; /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); @@ -241,22 +244,31 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); err = ceph_handle_snapdir(req, dentry, err); - if (err) - goto out; - if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); - if (err) - goto out; - err = finish_open(file, req->r_dentry, ceph_open, opened); -out: - ret = ceph_finish_lookup(req, dentry, err); - ceph_mdsc_put_request(req); - dout("ceph_lookup_open result=%p\n", ret); - if (IS_ERR(ret)) - return PTR_ERR(ret); + if (d_unhashed(dentry)) { + dn = ceph_finish_lookup(req, dentry, err); + if (IS_ERR(dn)) + err = PTR_ERR(dn); + } else { + /* we were given a hashed negative dentry */ + dn = NULL; + } + if (err) + goto out_err; + if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) { + /* make vfs retry on splice, ENOENT, or symlink */ + dout("atomic_open finish_no_open on dn %p\n", dn); + err = finish_no_open(file, dn); + } else { + dout("atomic_open finish_open on dn %p\n", dn); + err = finish_open(file, dentry, ceph_open, opened); + } - dput(ret); +out_err: + ceph_mdsc_put_request(req); + dout("atomic_open result=%d\n", err); return err; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ebc95cc652b..66ebe720e40 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages, loff_t off, size_t len); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); -extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct file *od, unsigned flags, - umode_t mode, int *opened); +extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned flags, umode_t mode, + int *opened); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 497da5ce704..977dc0e85cc 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -246,6 +246,16 @@ struct smb_version_operations { bool (*can_echo)(struct TCP_Server_Info *); /* send echo request */ int (*echo)(struct TCP_Server_Info *); + /* create directory */ + int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *, + struct cifs_sb_info *); + /* set info on created directory */ + void (*mkdir_setinfo)(struct inode *, const char *, + struct cifs_sb_info *, struct cifs_tcon *, + const unsigned int); + /* remove directory */ + int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, + struct cifs_sb_info *); }; struct smb_version_values { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index cf7fb185103..f1bbf8305d3 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -289,18 +289,15 @@ extern int CIFSSMBUnixSetFileInfo(const unsigned int xid, u16 fid, u32 pid_of_opener); extern int CIFSSMBUnixSetPathInfo(const unsigned int xid, - struct cifs_tcon *tcon, char *file_name, + struct cifs_tcon *tcon, const char *file_name, const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, - int remap_special_chars); + int remap); extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *newName, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *name, const struct nls_table *nls_codepage, - int remap_special_chars); + const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, __u16 type, const struct nls_table *nls_codepage, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index cabc7a01f5d..074923ce593 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -948,15 +948,15 @@ DelFileRetry: } int -CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *dirName, const struct nls_table *nls_codepage, - int remap) +CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) { DELETE_DIRECTORY_REQ *pSMB = NULL; DELETE_DIRECTORY_RSP *pSMBr = NULL; int rc = 0; int bytes_returned; int name_len; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSSMBRmDir"); RmDirRetry: @@ -966,14 +966,15 @@ RmDirRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, dirName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, + PATH_MAX, cifs_sb->local_nls, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ - name_len = strnlen(dirName, PATH_MAX); + name_len = strnlen(name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->DirName, dirName, name_len); + strncpy(pSMB->DirName, name, name_len); } pSMB->BufferFormat = 0x04; @@ -992,14 +993,15 @@ RmDirRetry: } int -CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, - const char *name, const struct nls_table *nls_codepage, int remap) +CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) { int rc = 0; CREATE_DIRECTORY_REQ *pSMB = NULL; CREATE_DIRECTORY_RSP *pSMBr = NULL; int bytes_returned; int name_len; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSSMBMkDir"); MkDirRetry: @@ -1010,7 +1012,8 @@ MkDirRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, - PATH_MAX, nls_codepage, remap); + PATH_MAX, cifs_sb->local_nls, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ @@ -5943,7 +5946,7 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, int CIFSSMBUnixSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, - char *fileName, + const char *file_name, const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, int remap) { @@ -5964,14 +5967,14 @@ setPermsRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); + name_len = strnlen(file_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + strncpy(pSMB->FileName, file_name, name_len); } params = 6 + name_len; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 35cb6a374a4..7354877fa3b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1219,16 +1219,153 @@ unlink_out: return rc; } +static int +cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, + const char *full_path, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon, const unsigned int xid) +{ + int rc = 0; + struct inode *newinode = NULL; + + if (tcon->unix_ext) + rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, + xid); + else + rc = cifs_get_inode_info(&newinode, full_path, NULL, + inode->i_sb, xid, NULL); + if (rc) + return rc; + + d_instantiate(dentry, newinode); + /* + * setting nlink not necessary except in cases where we failed to get it + * from the server or was set bogus + */ + if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) + set_nlink(dentry->d_inode, 2); + + mode &= ~current_umask(); + /* must turn on setgid bit if parent dir has it */ + if (inode->i_mode & S_ISGID) + mode |= S_ISGID; + + if (tcon->unix_ext) { + struct cifs_unix_set_info_args args = { + .mode = mode, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + args.uid = (__u64)current_fsuid(); + if (inode->i_mode & S_ISGID) + args.gid = (__u64)inode->i_gid; + else + args.gid = (__u64)current_fsgid(); + } else { + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; + } + CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + } else { + struct TCP_Server_Info *server = tcon->ses->server; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) + server->ops->mkdir_setinfo(newinode, full_path, cifs_sb, + tcon, xid); + if (dentry->d_inode) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + dentry->d_inode->i_mode = (mode | S_IFDIR); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + dentry->d_inode->i_uid = current_fsuid(); + if (inode->i_mode & S_ISGID) + dentry->d_inode->i_gid = inode->i_gid; + else + dentry->d_inode->i_gid = + current_fsgid(); + } + } + } + return rc; +} + +static int +cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode, + const char *full_path, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon, const unsigned int xid) +{ + int rc = 0; + u32 oplock = 0; + FILE_UNIX_BASIC_INFO *info = NULL; + struct inode *newinode = NULL; + struct cifs_fattr fattr; + + info = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); + if (info == NULL) { + rc = -ENOMEM; + goto posix_mkdir_out; + } + + mode &= ~current_umask(); + rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode, + NULL /* netfid */, info, &oplock, full_path, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == -EOPNOTSUPP) + goto posix_mkdir_out; + else if (rc) { + cFYI(1, "posix mkdir returned 0x%x", rc); + d_drop(dentry); + goto posix_mkdir_out; + } + + if (info->Type == cpu_to_le32(-1)) + /* no return info, go query for it */ + goto posix_mkdir_get_info; + /* + * BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if + * need to set uid/gid. + */ + + cifs_unix_basic_to_fattr(&fattr, info, cifs_sb); + cifs_fill_uniqueid(inode->i_sb, &fattr); + newinode = cifs_iget(inode->i_sb, &fattr); + if (!newinode) + goto posix_mkdir_get_info; + + d_instantiate(dentry, newinode); + +#ifdef CONFIG_CIFS_DEBUG2 + cFYI(1, "instantiated dentry %p %s to inode %p", dentry, + dentry->d_name.name, newinode); + + if (newinode->i_nlink != 2) + cFYI(1, "unexpected number of links %d", newinode->i_nlink); +#endif + +posix_mkdir_out: + kfree(info); + return rc; +posix_mkdir_get_info: + rc = cifs_mkdir_qinfo(inode, dentry, mode, full_path, cifs_sb, tcon, + xid); + goto posix_mkdir_out; +} + int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) { - int rc = 0, tmprc; + int rc = 0; unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; - char *full_path = NULL; - struct inode *newinode = NULL; - struct cifs_fattr fattr; + struct TCP_Server_Info *server; + char *full_path; cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode); @@ -1248,145 +1385,29 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { - u32 oplock = 0; - FILE_UNIX_BASIC_INFO *pInfo = - kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); - if (pInfo == NULL) { - rc = -ENOMEM; + rc = cifs_posix_mkdir(inode, direntry, mode, full_path, cifs_sb, + tcon, xid); + if (rc != -EOPNOTSUPP) goto mkdir_out; - } - - mode &= ~current_umask(); - rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT, - mode, NULL /* netfid */, pInfo, &oplock, - full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == -EOPNOTSUPP) { - kfree(pInfo); - goto mkdir_retry_old; - } else if (rc) { - cFYI(1, "posix mkdir returned 0x%x", rc); - d_drop(direntry); - } else { - if (pInfo->Type == cpu_to_le32(-1)) { - /* no return info, go query for it */ - kfree(pInfo); - goto mkdir_get_info; - } -/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need - to set uid/gid */ - - cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); - cifs_fill_uniqueid(inode->i_sb, &fattr); - newinode = cifs_iget(inode->i_sb, &fattr); - if (!newinode) { - kfree(pInfo); - goto mkdir_get_info; - } - - d_instantiate(direntry, newinode); + } -#ifdef CONFIG_CIFS_DEBUG2 - cFYI(1, "instantiated dentry %p %s to inode %p", - direntry, direntry->d_name.name, newinode); + server = tcon->ses->server; - if (newinode->i_nlink != 2) - cFYI(1, "unexpected number of links %d", - newinode->i_nlink); -#endif - } - kfree(pInfo); + if (!server->ops->mkdir) { + rc = -ENOSYS; goto mkdir_out; } -mkdir_retry_old: + /* BB add setting the equivalent of mode via CreateX w/ACLs */ - rc = CIFSSMBMkDir(xid, tcon, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb); if (rc) { cFYI(1, "cifs_mkdir returned 0x%x", rc); d_drop(direntry); - } else { -mkdir_get_info: - if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb, xid); - else - rc = cifs_get_inode_info(&newinode, full_path, NULL, - inode->i_sb, xid, NULL); - - d_instantiate(direntry, newinode); - /* setting nlink not necessary except in cases where we - * failed to get it from the server or was set bogus */ - if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) - set_nlink(direntry->d_inode, 2); - - mode &= ~current_umask(); - /* must turn on setgid bit if parent dir has it */ - if (inode->i_mode & S_ISGID) - mode |= S_ISGID; - - if (tcon->unix_ext) { - struct cifs_unix_set_info_args args = { - .mode = mode, - .ctime = NO_CHANGE_64, - .atime = NO_CHANGE_64, - .mtime = NO_CHANGE_64, - .device = 0, - }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - args.uid = (__u64)current_fsuid(); - if (inode->i_mode & S_ISGID) - args.gid = (__u64)inode->i_gid; - else - args.gid = (__u64)current_fsgid(); - } else { - args.uid = NO_CHANGE_64; - args.gid = NO_CHANGE_64; - } - CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - } else { - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && - (mode & S_IWUGO) == 0) { - FILE_BASIC_INFO pInfo; - struct cifsInodeInfo *cifsInode; - u32 dosattrs; - - memset(&pInfo, 0, sizeof(pInfo)); - cifsInode = CIFS_I(newinode); - dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; - pInfo.Attributes = cpu_to_le32(dosattrs); - tmprc = CIFSSMBSetPathInfo(xid, tcon, - full_path, &pInfo, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (tmprc == 0) - cifsInode->cifsAttrs = dosattrs; - } - if (direntry->d_inode) { - if (cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_DYNPERM) - direntry->d_inode->i_mode = - (mode | S_IFDIR); - - if (cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_SET_UID) { - direntry->d_inode->i_uid = - current_fsuid(); - if (inode->i_mode & S_ISGID) - direntry->d_inode->i_gid = - inode->i_gid; - else - direntry->d_inode->i_gid = - current_fsgid(); - } - } - } + goto mkdir_out; } + + rc = cifs_mkdir_qinfo(inode, direntry, mode, full_path, cifs_sb, tcon, + xid); mkdir_out: /* * Force revalidate to get parent dir info when needed since cached @@ -1405,7 +1426,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) unsigned int xid; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; char *full_path = NULL; struct cifsInodeInfo *cifsInode; @@ -1425,10 +1447,16 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) rc = PTR_ERR(tlink); goto rmdir_exit; } - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + + if (!server->ops->rmdir) { + rc = -ENOSYS; + cifs_put_tlink(tlink); + goto rmdir_exit; + } - rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb); cifs_put_tlink(tlink); if (!rc) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index c40356d24c5..3129ac74b81 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -586,6 +586,27 @@ cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon) #endif } +static void +cifs_mkdir_setinfo(struct inode *inode, const char *full_path, + struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, + const unsigned int xid) +{ + FILE_BASIC_INFO info; + struct cifsInodeInfo *cifsInode; + u32 dosattrs; + int rc; + + memset(&info, 0, sizeof(info)); + cifsInode = CIFS_I(inode); + dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; + info.Attributes = cpu_to_le32(dosattrs); + rc = CIFSSMBSetPathInfo(xid, tcon, full_path, &info, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == 0) + cifsInode->cifsAttrs = dosattrs; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -620,6 +641,9 @@ struct smb_version_operations smb1_operations = { .get_srv_inum = cifs_get_srv_inum, .build_path_to_root = cifs_build_path_to_root, .echo = CIFSSMBEcho, + .mkdir = CIFSSMBMkDir, + .mkdir_setinfo = cifs_mkdir_setinfo, + .rmdir = CIFSSMBRmDir, }; struct smb_version_values smb1_values = { diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1ba5c405315..2aa5cb08c52 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -122,3 +122,42 @@ out: kfree(smb2_data); return rc; } + +int +smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_open_op_close(xid, tcon, cifs_sb, name, + FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, + CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR); +} + +void +smb2_mkdir_setinfo(struct inode *inode, const char *name, + struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, + const unsigned int xid) +{ + FILE_BASIC_INFO data; + struct cifsInodeInfo *cifs_i; + u32 dosattrs; + int tmprc; + + memset(&data, 0, sizeof(data)); + cifs_i = CIFS_I(inode); + dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; + data.Attributes = cpu_to_le32(dosattrs); + tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name, + FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, + CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO); + if (tmprc == 0) + cifs_i->cifsAttrs = dosattrs; +} + +int +smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, + 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, + NULL, SMB2_OP_DELETE); +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 410cf925ea2..826209bf368 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -318,6 +318,9 @@ struct smb_version_operations smb21_operations = { .query_path_info = smb2_query_path_info, .get_srv_inum = smb2_get_srv_inum, .build_path_to_root = smb2_build_path_to_root, + .mkdir = smb2_mkdir, + .mkdir_setinfo = smb2_mkdir_setinfo, + .rmdir = smb2_rmdir, }; struct smb_version_values smb21_values = { diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 902bbe2b5ad..bfaa7b148af 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -52,6 +52,14 @@ extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, bool *adjust_tz); +extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, + const char *name, struct cifs_sb_info *cifs_sb); +extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, + struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon, const unsigned int xid); +extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, + const char *name, struct cifs_sb_info *cifs_sb); + /* * SMB2 Worker functions - most of protocol specific implementation details * are contained within these calls. diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 989e034f02b..cfb4b9fed52 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -385,8 +385,6 @@ struct ecryptfs_msg_ctx { struct mutex mux; }; -struct ecryptfs_daemon; - struct ecryptfs_daemon { #define ECRYPTFS_DAEMON_IN_READ 0x00000001 #define ECRYPTFS_DAEMON_IN_POLL 0x00000002 @@ -394,10 +392,7 @@ struct ecryptfs_daemon { #define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008 u32 flags; u32 num_queued_msg_ctx; - struct pid *pid; - uid_t euid; - struct user_namespace *user_ns; - struct task_struct *task; + struct file *file; struct mutex mux; struct list_head msg_ctx_out_queue; wait_queue_head_t wait; @@ -554,6 +549,8 @@ extern struct kmem_cache *ecryptfs_key_tfm_cache; struct inode *ecryptfs_get_inode(struct inode *lower_inode, struct super_block *sb); void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); +int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, struct dentry *ecryptfs_dentry, @@ -607,13 +604,8 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); -int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, - struct pid *pid); -int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, - struct pid *pid); -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, - struct user_namespace *user_ns, struct pid *pid, - u32 seq); +int ecryptfs_process_response(struct ecryptfs_daemon *daemon, + struct ecryptfs_message *msg, u32 seq); int ecryptfs_send_message(char *data, int data_len, struct ecryptfs_msg_ctx **msg_ctx); int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, @@ -658,8 +650,7 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, size_t *length_size); int ecryptfs_write_packet_length(char *dest, size_t size, @@ -671,8 +662,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, u16 msg_flags, struct ecryptfs_daemon *daemon); void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); int -ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns, struct pid *pid); +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file); int ecryptfs_init_kthread(void); void ecryptfs_destroy_kthread(void); int ecryptfs_privileged_open(struct file **lower_file, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2b17f2f9b12..44ce5c6a541 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -138,29 +138,50 @@ out: return rc; } -static void ecryptfs_vma_close(struct vm_area_struct *vma) -{ - filemap_write_and_wait(vma->vm_file->f_mapping); -} - -static const struct vm_operations_struct ecryptfs_file_vm_ops = { - .close = ecryptfs_vma_close, - .fault = filemap_fault, -}; +struct kmem_cache *ecryptfs_file_info_cache; -static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int read_or_initialize_metadata(struct dentry *dentry) { + struct inode *inode = dentry->d_inode; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + struct ecryptfs_crypt_stat *crypt_stat; int rc; - rc = generic_file_mmap(file, vma); + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + mount_crypt_stat = &ecryptfs_superblock_to_private( + inode->i_sb)->mount_crypt_stat; + mutex_lock(&crypt_stat->cs_mutex); + + if (crypt_stat->flags & ECRYPTFS_POLICY_APPLIED && + crypt_stat->flags & ECRYPTFS_KEY_VALID) { + rc = 0; + goto out; + } + + rc = ecryptfs_read_metadata(dentry); if (!rc) - vma->vm_ops = &ecryptfs_file_vm_ops; + goto out; + + if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) { + crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED + | ECRYPTFS_ENCRYPTED); + rc = 0; + goto out; + } + if (!(mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) && + !i_size_read(ecryptfs_inode_to_lower(inode))) { + rc = ecryptfs_initialize_file(dentry, inode); + if (!rc) + goto out; + } + + rc = -EIO; +out: + mutex_unlock(&crypt_stat->cs_mutex); return rc; } -struct kmem_cache *ecryptfs_file_info_cache; - /** * ecryptfs_open * @inode: inode speciying file to open @@ -236,32 +257,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) rc = 0; goto out; } - mutex_lock(&crypt_stat->cs_mutex); - if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) - || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { - rc = ecryptfs_read_metadata(ecryptfs_dentry); - if (rc) { - ecryptfs_printk(KERN_DEBUG, - "Valid headers not found\n"); - if (!(mount_crypt_stat->flags - & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { - rc = -EIO; - printk(KERN_WARNING "Either the lower file " - "is not in a valid eCryptfs format, " - "or the key could not be retrieved. " - "Plaintext passthrough mode is not " - "enabled; returning -EIO\n"); - mutex_unlock(&crypt_stat->cs_mutex); - goto out_put; - } - rc = 0; - crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED - | ECRYPTFS_ENCRYPTED); - mutex_unlock(&crypt_stat->cs_mutex); - goto out; - } - } - mutex_unlock(&crypt_stat->cs_mutex); + rc = read_or_initialize_metadata(ecryptfs_dentry); + if (rc) + goto out_put; ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, (unsigned long long)i_size_read(inode)); @@ -292,15 +290,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) static int ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int rc = 0; - - rc = generic_file_fsync(file, start, end, datasync); - if (rc) - goto out; - rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end, - datasync); -out: - return rc; + return vfs_fsync(ecryptfs_file_to_lower(file), datasync); } static int ecryptfs_fasync(int fd, struct file *file, int flag) @@ -369,7 +359,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = ecryptfs_file_mmap, + .mmap = generic_file_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ffa2be57804..534b129ea67 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -143,6 +143,31 @@ static int ecryptfs_interpose(struct dentry *lower_dentry, return 0; } +static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; + int rc; + + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + rc = vfs_unlink(lower_dir_inode, lower_dentry); + if (rc) { + printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); + goto out_unlock; + } + fsstack_copy_attr_times(dir, lower_dir_inode); + set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink); + inode->i_ctime = dir->i_ctime; + d_drop(dentry); +out_unlock: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + return rc; +} + /** * ecryptfs_do_create * @directory_inode: inode of the new file's dentry's parent in ecryptfs @@ -182,8 +207,10 @@ ecryptfs_do_create(struct inode *directory_inode, } inode = __ecryptfs_get_inode(lower_dentry->d_inode, directory_inode->i_sb); - if (IS_ERR(inode)) + if (IS_ERR(inode)) { + vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); goto out_lock; + } fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode); out_lock: @@ -200,8 +227,8 @@ out: * * Returns zero on success */ -static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, - struct inode *ecryptfs_inode) +int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; @@ -264,7 +291,9 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, * that this on disk file is prepared to be an ecryptfs file */ rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode); if (rc) { - drop_nlink(ecryptfs_inode); + ecryptfs_do_unlink(directory_inode, ecryptfs_dentry, + ecryptfs_inode); + make_bad_inode(ecryptfs_inode); unlock_new_inode(ecryptfs_inode); iput(ecryptfs_inode); goto out; @@ -318,21 +347,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, struct vfsmount *lower_mnt; int rc = 0; - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); - fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); - BUG_ON(!lower_dentry->d_count); - dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); - ecryptfs_set_dentry_private(dentry, dentry_info); if (!dentry_info) { printk(KERN_ERR "%s: Out of memory whilst attempting " "to allocate ecryptfs_dentry_info struct\n", __func__); dput(lower_dentry); - mntput(lower_mnt); - d_drop(dentry); return -ENOMEM; } + + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); + fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); + BUG_ON(!lower_dentry->d_count); + + ecryptfs_set_dentry_private(dentry, dentry_info); ecryptfs_set_dentry_lower(dentry, lower_dentry); ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); @@ -381,12 +409,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct dentry *lower_dir_dentry, *lower_dentry; int rc = 0; - if ((ecryptfs_dentry->d_name.len == 1 - && !strcmp(ecryptfs_dentry->d_name.name, ".")) - || (ecryptfs_dentry->d_name.len == 2 - && !strcmp(ecryptfs_dentry->d_name.name, ".."))) { - goto out_d_drop; - } lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); mutex_lock(&lower_dir_dentry->d_inode->i_mutex); lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, @@ -397,8 +419,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, rc = PTR_ERR(lower_dentry); ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, - encrypted_and_encoded_name); - goto out_d_drop; + ecryptfs_dentry->d_name.name); + goto out; } if (lower_dentry->d_inode) goto interpose; @@ -415,7 +437,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, if (rc) { printk(KERN_ERR "%s: Error attempting to encrypt and encode " "filename; rc = [%d]\n", __func__, rc); - goto out_d_drop; + goto out; } mutex_lock(&lower_dir_dentry->d_inode->i_mutex); lower_dentry = lookup_one_len(encrypted_and_encoded_name, @@ -427,14 +449,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, encrypted_and_encoded_name); - goto out_d_drop; + goto out; } interpose: rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, ecryptfs_dir_inode); - goto out; -out_d_drop: - d_drop(ecryptfs_dentry); out: kfree(encrypted_and_encoded_name); return ERR_PTR(rc); @@ -476,27 +495,7 @@ out_lock: static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) { - int rc = 0; - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); - struct dentry *lower_dir_dentry; - - dget(lower_dentry); - lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry); - if (rc) { - printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); - goto out_unlock; - } - fsstack_copy_attr_times(dir, lower_dir_inode); - set_nlink(dentry->d_inode, - ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink); - dentry->d_inode->i_ctime = dir->i_ctime; - d_drop(dentry); -out_unlock: - unlock_dir(lower_dir_dentry); - dput(lower_dentry); - return rc; + return ecryptfs_do_unlink(dir, dentry, dentry->d_inode); } static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, @@ -971,12 +970,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) goto out; } - if (S_ISREG(inode->i_mode)) { - rc = filemap_write_and_wait(inode->i_mapping); - if (rc) - goto out; - fsstack_copy_attr_all(inode, lower_inode); - } memcpy(&lower_ia, ia, sizeof(lower_ia)); if (ia->ia_valid & ATTR_FILE) lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1c0b3b6b75c..2768138eefe 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -279,6 +279,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, char *fnek_src; char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; + u8 cipher_code; *check_ruid = 0; @@ -420,6 +421,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, && !fn_cipher_key_bytes_set) mount_crypt_stat->global_default_fn_cipher_key_bytes = mount_crypt_stat->global_default_cipher_key_size; + + cipher_code = ecryptfs_code_for_cipher_string( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + if (!cipher_code) { + ecryptfs_printk(KERN_ERR, + "eCryptfs doesn't support cipher: %s", + mount_crypt_stat->global_default_cipher_name); + rc = -EINVAL; + goto out; + } + mutex_lock(&key_tfm_list_mutex); if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, NULL)) { @@ -540,6 +553,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags } ecryptfs_set_superblock_lower(s, path.dentry->d_sb); + + /** + * Set the POSIX ACL flag based on whether they're enabled in the lower + * mount. Force a read-only eCryptfs mount if the lower mount is ro. + * Allow a ro eCryptfs mount even when the lower mount is rw. + */ + s->s_flags = flags & ~MS_POSIXACL; + s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; s->s_magic = ECRYPTFS_SUPER_MAGIC; diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index a750f957b14..b29bb8bfa8d 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -32,8 +32,8 @@ static struct mutex ecryptfs_msg_ctx_lists_mux; static struct hlist_head *ecryptfs_daemon_hash; struct mutex ecryptfs_daemon_hash_mux; static int ecryptfs_hash_bits; -#define ecryptfs_uid_hash(uid) \ - hash_long((unsigned long)uid, ecryptfs_hash_bits) +#define ecryptfs_current_euid_hash(uid) \ + hash_long((unsigned long)current_euid(), ecryptfs_hash_bits) static u32 ecryptfs_msg_counter; static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; @@ -105,26 +105,24 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) /** * ecryptfs_find_daemon_by_euid - * @euid: The effective user id which maps to the desired daemon id - * @user_ns: The namespace in which @euid applies * @daemon: If return value is zero, points to the desired daemon pointer * * Must be called with ecryptfs_daemon_hash_mux held. * - * Search the hash list for the given user id. + * Search the hash list for the current effective user id. * * Returns zero if the user id exists in the list; non-zero otherwise. */ -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns) +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) { struct hlist_node *elem; int rc; hlist_for_each_entry(*daemon, elem, - &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)], - euid_chain) { - if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) { + &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], + euid_chain) { + if ((*daemon)->file->f_cred->euid == current_euid() && + (*daemon)->file->f_cred->user_ns == current_user_ns()) { rc = 0; goto out; } @@ -137,9 +135,7 @@ out: /** * ecryptfs_spawn_daemon - Create and initialize a new daemon struct * @daemon: Pointer to set to newly allocated daemon struct - * @euid: Effective user id for the daemon - * @user_ns: The namespace in which @euid applies - * @pid: Process id for the daemon + * @file: File used when opening /dev/ecryptfs * * Must be called ceremoniously while in possession of * ecryptfs_sacred_daemon_hash_mux @@ -147,8 +143,7 @@ out: * Returns zero on success; non-zero otherwise */ int -ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, - struct user_namespace *user_ns, struct pid *pid) +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file) { int rc = 0; @@ -159,16 +154,13 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); goto out; } - (*daemon)->euid = euid; - (*daemon)->user_ns = get_user_ns(user_ns); - (*daemon)->pid = get_pid(pid); - (*daemon)->task = current; + (*daemon)->file = file; mutex_init(&(*daemon)->mux); INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue); init_waitqueue_head(&(*daemon)->wait); (*daemon)->num_queued_msg_ctx = 0; hlist_add_head(&(*daemon)->euid_chain, - &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]); + &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()]); out: return rc; } @@ -188,9 +180,6 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ) || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) { rc = -EBUSY; - printk(KERN_WARNING "%s: Attempt to destroy daemon with pid " - "[0x%p], but it is in the midst of a read or a poll\n", - __func__, daemon->pid); mutex_unlock(&daemon->mux); goto out; } @@ -203,12 +192,6 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) ecryptfs_msg_ctx_alloc_to_free(msg_ctx); } hlist_del(&daemon->euid_chain); - if (daemon->task) - wake_up_process(daemon->task); - if (daemon->pid) - put_pid(daemon->pid); - if (daemon->user_ns) - put_user_ns(daemon->user_ns); mutex_unlock(&daemon->mux); kzfree(daemon); out: @@ -216,42 +199,9 @@ out: } /** - * ecryptfs_process_quit - * @euid: The user ID owner of the message - * @user_ns: The namespace in which @euid applies - * @pid: The process ID for the userspace program that sent the - * message - * - * Deletes the corresponding daemon for the given euid and pid, if - * it is the registered that is requesting the deletion. Returns zero - * after deleting the desired daemon; non-zero otherwise. - */ -int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, - struct pid *pid) -{ - struct ecryptfs_daemon *daemon; - int rc; - - mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns); - if (rc || !daemon) { - rc = -EINVAL; - printk(KERN_ERR "Received request from user [%d] to " - "unregister unrecognized daemon [0x%p]\n", euid, pid); - goto out_unlock; - } - rc = ecryptfs_exorcise_daemon(daemon); -out_unlock: - mutex_unlock(&ecryptfs_daemon_hash_mux); - return rc; -} - -/** * ecryptfs_process_reponse * @msg: The ecryptfs message received; the caller should sanity check * msg->data_len and free the memory - * @pid: The process ID of the userspace application that sent the - * message * @seq: The sequence number of the message; must match the sequence * number for the existing message context waiting for this * response @@ -270,16 +220,11 @@ out_unlock: * * Returns zero on success; non-zero otherwise */ -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, - struct user_namespace *user_ns, struct pid *pid, - u32 seq) +int ecryptfs_process_response(struct ecryptfs_daemon *daemon, + struct ecryptfs_message *msg, u32 seq) { - struct ecryptfs_daemon *uninitialized_var(daemon); struct ecryptfs_msg_ctx *msg_ctx; size_t msg_size; - struct nsproxy *nsproxy; - struct user_namespace *tsk_user_ns; - uid_t ctx_euid; int rc; if (msg->index >= ecryptfs_message_buf_len) { @@ -292,51 +237,6 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, } msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; mutex_lock(&msg_ctx->mux); - mutex_lock(&ecryptfs_daemon_hash_mux); - rcu_read_lock(); - nsproxy = task_nsproxy(msg_ctx->task); - if (nsproxy == NULL) { - rc = -EBADMSG; - printk(KERN_ERR "%s: Receiving process is a zombie. Dropping " - "message.\n", __func__); - rcu_read_unlock(); - mutex_unlock(&ecryptfs_daemon_hash_mux); - goto wake_up; - } - tsk_user_ns = __task_cred(msg_ctx->task)->user_ns; - ctx_euid = task_euid(msg_ctx->task); - rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns); - rcu_read_unlock(); - mutex_unlock(&ecryptfs_daemon_hash_mux); - if (rc) { - rc = -EBADMSG; - printk(KERN_WARNING "%s: User [%d] received a " - "message response from process [0x%p] but does " - "not have a registered daemon\n", __func__, - ctx_euid, pid); - goto wake_up; - } - if (ctx_euid != euid) { - rc = -EBADMSG; - printk(KERN_WARNING "%s: Received message from user " - "[%d]; expected message from user [%d]\n", __func__, - euid, ctx_euid); - goto unlock; - } - if (tsk_user_ns != user_ns) { - rc = -EBADMSG; - printk(KERN_WARNING "%s: Received message from user_ns " - "[0x%p]; expected message from user_ns [0x%p]\n", - __func__, user_ns, tsk_user_ns); - goto unlock; - } - if (daemon->pid != pid) { - rc = -EBADMSG; - printk(KERN_ERR "%s: User [%d] sent a message response " - "from an unrecognized process [0x%p]\n", - __func__, ctx_euid, pid); - goto unlock; - } if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { rc = -EINVAL; printk(KERN_WARNING "%s: Desired context element is not " @@ -359,9 +259,8 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, } memcpy(msg_ctx->msg, msg, msg_size); msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; - rc = 0; -wake_up: wake_up_process(msg_ctx->task); + rc = 0; unlock: mutex_unlock(&msg_ctx->mux); out: @@ -383,14 +282,11 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) { struct ecryptfs_daemon *daemon; - uid_t euid = current_euid(); int rc; - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); + rc = ecryptfs_find_daemon_by_euid(&daemon); if (rc || !daemon) { rc = -ENOTCONN; - printk(KERN_ERR "%s: User [%d] does not have a daemon " - "registered\n", __func__, euid); goto out; } mutex_lock(&ecryptfs_msg_ctx_lists_mux); diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index c0038f6566d..412e6eda25f 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -33,7 +33,7 @@ static atomic_t ecryptfs_num_miscdev_opens; /** * ecryptfs_miscdev_poll - * @file: dev file (ignored) + * @file: dev file * @pt: dev poll table (ignored) * * Returns the poll mask @@ -41,20 +41,10 @@ static atomic_t ecryptfs_num_miscdev_opens; static unsigned int ecryptfs_miscdev_poll(struct file *file, poll_table *pt) { - struct ecryptfs_daemon *daemon; + struct ecryptfs_daemon *daemon = file->private_data; unsigned int mask = 0; - uid_t euid = current_euid(); - int rc; - mutex_lock(&ecryptfs_daemon_hash_mux); - /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) { - mutex_unlock(&ecryptfs_daemon_hash_mux); - return -EINVAL; - } mutex_lock(&daemon->mux); - mutex_unlock(&ecryptfs_daemon_hash_mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { printk(KERN_WARNING "%s: Attempt to poll on zombified " "daemon\n", __func__); @@ -79,7 +69,7 @@ out_unlock_daemon: /** * ecryptfs_miscdev_open * @inode: inode of miscdev handle (ignored) - * @file: file for miscdev handle (ignored) + * @file: file for miscdev handle * * Returns zero on success; non-zero otherwise */ @@ -87,7 +77,6 @@ static int ecryptfs_miscdev_open(struct inode *inode, struct file *file) { struct ecryptfs_daemon *daemon = NULL; - uid_t euid = current_euid(); int rc; mutex_lock(&ecryptfs_daemon_hash_mux); @@ -98,30 +87,20 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) "count; rc = [%d]\n", __func__, rc); goto out_unlock_daemon_list; } - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) { - rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(), - task_pid(current)); - if (rc) { - printk(KERN_ERR "%s: Error attempting to spawn daemon; " - "rc = [%d]\n", __func__, rc); - goto out_module_put_unlock_daemon_list; - } - } - mutex_lock(&daemon->mux); - if (daemon->pid != task_pid(current)) { + rc = ecryptfs_find_daemon_by_euid(&daemon); + if (!rc) { rc = -EINVAL; - printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], " - "but pid [0x%p] has attempted to open the handle " - "instead\n", __func__, daemon->pid, daemon->euid, - task_pid(current)); - goto out_unlock_daemon; + goto out_unlock_daemon_list; + } + rc = ecryptfs_spawn_daemon(&daemon, file); + if (rc) { + printk(KERN_ERR "%s: Error attempting to spawn daemon; " + "rc = [%d]\n", __func__, rc); + goto out_module_put_unlock_daemon_list; } + mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { rc = -EBUSY; - printk(KERN_ERR "%s: Miscellaneous device handle may only be " - "opened once per daemon; pid [0x%p] already has this " - "handle open\n", __func__, daemon->pid); goto out_unlock_daemon; } daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; @@ -140,7 +119,7 @@ out_unlock_daemon_list: /** * ecryptfs_miscdev_release * @inode: inode of fs/ecryptfs/euid handle (ignored) - * @file: file for fs/ecryptfs/euid handle (ignored) + * @file: file for fs/ecryptfs/euid handle * * This keeps the daemon registered until the daemon sends another * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters. @@ -150,20 +129,18 @@ out_unlock_daemon_list: static int ecryptfs_miscdev_release(struct inode *inode, struct file *file) { - struct ecryptfs_daemon *daemon = NULL; - uid_t euid = current_euid(); + struct ecryptfs_daemon *daemon = file->private_data; int rc; - mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) - daemon = file->private_data; mutex_lock(&daemon->mux); BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; atomic_dec(&ecryptfs_num_miscdev_opens); mutex_unlock(&daemon->mux); + + mutex_lock(&ecryptfs_daemon_hash_mux); rc = ecryptfs_exorcise_daemon(daemon); + mutex_unlock(&ecryptfs_daemon_hash_mux); if (rc) { printk(KERN_CRIT "%s: Fatal error whilst attempting to " "shut down daemon; rc = [%d]. Please report this " @@ -171,7 +148,6 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) BUG(); } module_put(THIS_MODULE); - mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } @@ -248,7 +224,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, /** * ecryptfs_miscdev_read - format and send message from queue - * @file: fs/ecryptfs/euid miscdevfs handle (ignored) + * @file: miscdevfs handle * @buf: User buffer into which to copy the next message on the daemon queue * @count: Amount of space available in @buf * @ppos: Offset in file (ignored) @@ -262,43 +238,27 @@ static ssize_t ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct ecryptfs_daemon *daemon; + struct ecryptfs_daemon *daemon = file->private_data; struct ecryptfs_msg_ctx *msg_ctx; size_t packet_length_size; char packet_length[ECRYPTFS_MAX_PKT_LEN_SIZE]; size_t i; size_t total_length; - uid_t euid = current_euid(); int rc; - mutex_lock(&ecryptfs_daemon_hash_mux); - /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - if (rc || !daemon) { - mutex_unlock(&ecryptfs_daemon_hash_mux); - return -EINVAL; - } mutex_lock(&daemon->mux); - if (task_pid(current) != daemon->pid) { - mutex_unlock(&daemon->mux); - mutex_unlock(&ecryptfs_daemon_hash_mux); - return -EPERM; - } if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { rc = 0; - mutex_unlock(&ecryptfs_daemon_hash_mux); printk(KERN_WARNING "%s: Attempt to read from zombified " "daemon\n", __func__); goto out_unlock_daemon; } if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) { rc = 0; - mutex_unlock(&ecryptfs_daemon_hash_mux); goto out_unlock_daemon; } /* This daemon will not go away so long as this flag is set */ daemon->flags |= ECRYPTFS_DAEMON_IN_READ; - mutex_unlock(&ecryptfs_daemon_hash_mux); check_list: if (list_empty(&daemon->msg_ctx_out_queue)) { mutex_unlock(&daemon->mux); @@ -382,16 +342,12 @@ out_unlock_daemon: * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon * @data: Bytes comprising struct ecryptfs_message * @data_size: sizeof(struct ecryptfs_message) + data len - * @euid: Effective user id of miscdevess sending the miscdev response - * @user_ns: The namespace in which @euid applies - * @pid: Miscdevess id of miscdevess sending the miscdev response * @seq: Sequence number for miscdev response packet * * Returns zero on success; non-zero otherwise */ -static int ecryptfs_miscdev_response(char *data, size_t data_size, - uid_t euid, struct user_namespace *user_ns, - struct pid *pid, u32 seq) +static int ecryptfs_miscdev_response(struct ecryptfs_daemon *daemon, char *data, + size_t data_size, u32 seq) { struct ecryptfs_message *msg = (struct ecryptfs_message *)data; int rc; @@ -403,7 +359,7 @@ static int ecryptfs_miscdev_response(char *data, size_t data_size, rc = -EINVAL; goto out; } - rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq); + rc = ecryptfs_process_response(daemon, msg, seq); if (rc) printk(KERN_ERR "Error processing response message; rc = [%d]\n", rc); @@ -413,7 +369,7 @@ out: /** * ecryptfs_miscdev_write - handle write to daemon miscdev handle - * @file: File for misc dev handle (ignored) + * @file: File for misc dev handle * @buf: Buffer containing user data * @count: Amount of data in @buf * @ppos: Pointer to offset in file (ignored) @@ -428,7 +384,6 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, u32 seq; size_t packet_size, packet_size_length; char *data; - uid_t euid = current_euid(); unsigned char packet_size_peek[ECRYPTFS_MAX_PKT_LEN_SIZE]; ssize_t rc; @@ -488,10 +443,9 @@ memdup: } memcpy(&counter_nbo, &data[PKT_CTR_OFFSET], PKT_CTR_SIZE); seq = be32_to_cpu(counter_nbo); - rc = ecryptfs_miscdev_response( + rc = ecryptfs_miscdev_response(file->private_data, &data[PKT_LEN_OFFSET + packet_size_length], - packet_size, euid, current_user_ns(), - task_pid(current), seq); + packet_size, seq); if (rc) { printk(KERN_WARNING "%s: Failed to deliver miscdev " "response to requesting operation; rc = [%zd]\n", diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index a46b3a8fee1..bd1d57f98f7 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -66,18 +66,6 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) { int rc; - /* - * Refuse to write the page out if we are called from reclaim context - * since our writepage() path may potentially allocate memory when - * calling into the lower fs vfs_write() which may in turn invoke - * us again. - */ - if (current->flags & PF_MEMALLOC) { - redirty_page_for_writepage(wbc, page); - rc = 0; - goto out; - } - rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting " @@ -498,7 +486,6 @@ static int ecryptfs_write_end(struct file *file, struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; int rc; - int need_unlock_page = 1; ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); @@ -519,26 +506,26 @@ static int ecryptfs_write_end(struct file *file, "zeros in page with index = [0x%.16lx]\n", index); goto out; } - set_page_dirty(page); - unlock_page(page); - need_unlock_page = 0; + rc = ecryptfs_encrypt_page(page); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " + "index [0x%.16lx])\n", index); + goto out; + } if (pos + copied > i_size_read(ecryptfs_inode)) { i_size_write(ecryptfs_inode, pos + copied); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " "[0x%.16llx]\n", (unsigned long long)i_size_read(ecryptfs_inode)); - balance_dirty_pages_ratelimited(mapping); - rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); - if (rc) { - printk(KERN_ERR "Error writing inode size to metadata; " - "rc = [%d]\n", rc); - goto out; - } } - rc = copied; + rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); + if (rc) + printk(KERN_ERR "Error writing inode size to metadata; " + "rc = [%d]\n", rc); + else + rc = copied; out: - if (need_unlock_page) - unlock_page(page); + unlock_page(page); page_cache_release(page); return rc; } diff --git a/fs/exec.c b/fs/exec.c index 3684353ebd5..574cf4de4ec 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file) */ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { - struct file *rp, *wp; + struct file *files[2]; struct fdtable *fdt; struct coredump_params *cp = (struct coredump_params *)info->data; struct files_struct *cf = current->files; + int err = create_pipe_files(files, 0); + if (err) + return err; - wp = create_write_pipe(0); - if (IS_ERR(wp)) - return PTR_ERR(wp); - - rp = create_read_pipe(wp, 0); - if (IS_ERR(rp)) { - free_write_pipe(wp); - return PTR_ERR(rp); - } - - cp->file = wp; + cp->file = files[1]; sys_close(0); - fd_install(0, rp); + fd_install(0, files[0]); spin_lock(&cf->file_lock); fdt = files_fdtable(cf); __set_open_fd(0, fdt); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 264d315f6c4..6363ac66faf 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode) truncate_inode_pages(&inode->i_data, 0); if (want_delete) { + sb_start_intwrite(inode->i_sb); /* set dtime */ EXT2_I(inode)->i_dtime = get_seconds(); mark_inode_dirty(inode); @@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode) if (unlikely(rsv)) kfree(rsv); - if (want_delete) + if (want_delete) { ext2_free_inode(inode); + sb_end_intwrite(inode->i_sb); + } } typedef struct { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9f311d27b16..af74d9e27b7 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb, static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext2_sync_fs(struct super_block *sb, int wait); +static int ext2_freeze(struct super_block *sb); +static int ext2_unfreeze(struct super_block *sb); void ext2_error(struct super_block *sb, const char *function, const char *fmt, ...) @@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = { .evict_inode = ext2_evict_inode, .put_super = ext2_put_super, .sync_fs = ext2_sync_fs, + .freeze_fs = ext2_freeze, + .unfreeze_fs = ext2_unfreeze, .statfs = ext2_statfs, .remount_fs = ext2_remount, .show_options = ext2_show_options, @@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait) return 0; } +static int ext2_freeze(struct super_block *sb) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + + /* + * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared + * because we have unattached inodes and thus filesystem is not fully + * consistent. + */ + if (atomic_long_read(&sb->s_remove_count)) { + ext2_sync_fs(sb, 1); + return 0; + } + /* Set EXT2_FS_VALID flag */ + spin_lock(&sbi->s_lock); + sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state); + spin_unlock(&sbi->s_lock); + ext2_sync_super(sb, sbi->s_es, 1); + + return 0; +} + +static int ext2_unfreeze(struct super_block *sb) +{ + /* Just write sb to clear EXT2_VALID_FS flag */ + ext2_write_super(sb); + + return 0; +} void ext2_write_super(struct super_block *sb) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 89b59cb7f9b..6324f74e034 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode) if (is_bad_inode(inode)) goto no_delete; + /* + * Protect us against freezing - iput() caller didn't have to have any + * protection against it + */ + sb_start_intwrite(inode->i_sb); handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); @@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode) * cleaned up. */ ext4_orphan_del(NULL, inode); + sb_end_intwrite(inode->i_sb); goto no_delete; } @@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode) stop_handle: ext4_journal_stop(handle); ext4_orphan_del(NULL, inode); + sb_end_intwrite(inode->i_sb); goto no_delete; } } @@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode) else ext4_free_inode(handle, inode); ext4_journal_stop(handle); + sb_end_intwrite(inode->i_sb); return; no_delete: ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ @@ -4779,11 +4787,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) get_block_t *get_block; int retries = 0; - /* - * This check is racy but catches the common case. We rely on - * __block_page_mkwrite() to do a reliable check. - */ - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_pagefault(inode->i_sb); /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && @@ -4851,5 +4855,6 @@ retry_alloc: out_ret: ret = block_page_mkwrite_return(ret); out: + sb_end_pagefault(inode->i_sb); return ret; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index f99a1311e84..fe7c63f4717 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) { struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); + /* + * We protect against freezing so that we don't create dirty buffers + * on frozen filesystem. + */ + sb_start_write(sb); ext4_mmp_csum_set(sb, mmp); mark_buffer_dirty(bh); lock_buffer(bh); @@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) get_bh(bh); submit_bh(WRITE_SYNC, bh); wait_on_buffer(bh); + sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) return 1; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2d51cd9af22..d76ec8277d3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -331,33 +331,17 @@ static void ext4_put_nojournal(handle_t *handle) * journal_end calls result in the superblock being marked dirty, so * that sync() will call the filesystem's write_super callback if * appropriate. - * - * To avoid j_barrier hold in userspace when a user calls freeze(), - * ext4 prevents a new handle from being started by s_frozen, which - * is in an upper layer. */ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) { journal_t *journal; - handle_t *handle; trace_ext4_journal_start(sb, nblocks, _RET_IP_); if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); + WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); journal = EXT4_SB(sb)->s_journal; - handle = ext4_journal_current_handle(); - - /* - * If a handle has been started, it should be allowed to - * finish, otherwise deadlock could happen between freeze - * and others(e.g. truncate) due to the restart of the - * journal handle if the filesystem is forzen and active - * handles are not stopped. - */ - if (!handle) - vfs_check_frozen(sb, SB_FREEZE_TRANS); - if (!journal) return ext4_get_nojournal(); /* @@ -2747,6 +2731,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) sb = elr->lr_super; ngroups = EXT4_SB(sb)->s_groups_count; + sb_start_write(sb); for (group = elr->lr_next_group; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) { @@ -2773,6 +2758,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; } + sb_end_write(sb); return ret; } @@ -4460,10 +4446,8 @@ int ext4_force_commit(struct super_block *sb) return 0; journal = EXT4_SB(sb)->s_journal; - if (journal) { - vfs_check_frozen(sb, SB_FREEZE_TRANS); + if (journal) ret = ext4_journal_force_commit(journal); - } return ret; } @@ -4493,9 +4477,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait) * gives us a chance to flush the journal completely and mark the fs clean. * * Note that only this function cannot bring a filesystem to be in a clean - * state independently, because ext4 prevents a new handle from being started - * by @sb->s_frozen, which stays in an upper layer. It thus needs help from - * the upper layer. + * state independently. It relies on upper layer to stop all data & metadata + * modifications. */ static int ext4_freeze(struct super_block *sb) { @@ -4522,7 +4505,7 @@ static int ext4_freeze(struct super_block *sb) EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); out: - /* we rely on s_frozen to stop further updates */ + /* we rely on upper layer to stop further updates */ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return error; } diff --git a/fs/fat/file.c b/fs/fat/file.c index a71fe3715ee..e007b8bd8e5 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) if (err) goto out; - mutex_lock(&inode->i_mutex); err = mnt_want_write_file(file); if (err) - goto out_unlock_inode; + goto out; + mutex_lock(&inode->i_mutex); /* * ATTR_VOLUME and ATTR_DIR cannot be changed; this also @@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) /* The root directory has no attributes */ if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { err = -EINVAL; - goto out_drop_write; + goto out_unlock_inode; } if (sbi->options.sys_immutable && ((attr | oldattr) & ATTR_SYS) && !capable(CAP_LINUX_IMMUTABLE)) { err = -EPERM; - goto out_drop_write; + goto out_unlock_inode; } /* @@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) */ err = security_inode_setattr(file->f_path.dentry, &ia); if (err) - goto out_drop_write; + goto out_unlock_inode; /* This MUST be done before doing anything irreversible... */ err = fat_setattr(file->f_path.dentry, &ia); if (err) - goto out_drop_write; + goto out_unlock_inode; fsnotify_change(file->f_path.dentry, ia.ia_valid); if (sbi->options.sys_immutable) { @@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) fat_save_attrs(inode, attr); mark_inode_dirty(inode); -out_drop_write: - mnt_drop_write_file(file); out_unlock_inode: mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(file); out: return err; } diff --git a/fs/file_table.c b/fs/file_table.c index b3fc4d67a26..701985e4ccd 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly; static struct percpu_counter nr_files __cacheline_aligned_in_smp; -static inline void file_free_rcu(struct rcu_head *head) +static void file_free_rcu(struct rcu_head *head) { struct file *f = container_of(head, struct file, f_u.fu_rcuhead); @@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file) return; if (file_check_writeable(file) != 0) return; - mnt_drop_write(mnt); + __mnt_drop_write(mnt); file_release_write(file); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 50d0b78130a..be3efc4f64f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,11 +52,6 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; -/* - * We don't actually have pdflush, but this one is exported though /proc... - */ -int nr_pdflush_threads; - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b321a688cde..93d8d6c9494 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -944,9 +944,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return err; count = ocount; - + sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -1004,6 +1003,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, out: current->backing_dev_info = NULL; mutex_unlock(&inode->i_mutex); + sb_end_write(inode->i_sb); return written ? written : err; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9aa6af13823..d1d791ef38d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -373,11 +373,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) loff_t size; int ret; - /* Wait if fs is frozen. This is racy so we check again later on - * and retry if the fs has been frozen after the page lock has - * been acquired - */ - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_pagefault(inode->i_sb); + + /* Update file times before taking page lock */ + file_update_time(vma->vm_file); ret = gfs2_rs_alloc(ip); if (ret) @@ -462,14 +461,9 @@ out: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); - /* This check must be post dropping of transaction lock */ - if (inode->i_sb->s_frozen == SB_UNFROZEN) { - wait_on_page_writeback(page); - } else { - ret = -EAGAIN; - unlock_page(page); - } + wait_on_page_writeback(page); } + sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ad3e2fb763d..adbd27875ef 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (revokes) tr->tr_reserved += gfs2_struct2blk(sdp, revokes, sizeof(u64)); + sb_start_intwrite(sdp->sd_vfs); gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); error = gfs2_glock_nq(&tr->tr_t_gh); @@ -68,6 +69,7 @@ fail_gunlock: gfs2_glock_dq(&tr->tr_t_gh); fail_holder_uninit: + sb_end_intwrite(sdp->sd_vfs); gfs2_holder_uninit(&tr->tr_t_gh); kfree(tr); @@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_holder_uninit(&tr->tr_t_gh); kfree(tr); } + sb_end_intwrite(sdp->sd_vfs); return; } @@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) gfs2_log_flush(sdp, NULL); + sb_end_intwrite(sdp->sd_vfs); } /** diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e13e9bdb0bf..8349a899912 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -416,8 +416,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) else v_offset = 0; - __unmap_hugepage_range(vma, - vma->vm_start + v_offset, vma->vm_end, NULL); + unmap_hugepage_range(vma, vma->vm_start + v_offset, + vma->vm_end, NULL); } } diff --git a/fs/inode.c b/fs/inode.c index 3cc50432046..ac8d904b3f1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1542,9 +1542,11 @@ void touch_atime(struct path *path) if (timespec_equal(&inode->i_atime, &now)) return; - if (mnt_want_write(mnt)) + if (!sb_start_write_trylock(inode->i_sb)) return; + if (__mnt_want_write(mnt)) + goto skip_update; /* * File systems can error out when updating inodes if they need to * allocate new space to modify an inode (such is the case for @@ -1555,7 +1557,9 @@ void touch_atime(struct path *path) * of the fs read only, e.g. subvolumes in Btrfs. */ update_time(inode, &now, S_ATIME); - mnt_drop_write(mnt); + __mnt_drop_write(mnt); +skip_update: + sb_end_write(inode->i_sb); } EXPORT_SYMBOL(touch_atime); @@ -1662,11 +1666,11 @@ int file_update_time(struct file *file) return 0; /* Finally allowed to write? Takes lock. */ - if (mnt_want_write_file(file)) + if (__mnt_want_write_file(file)) return 0; ret = update_time(inode, &now, sync_it); - mnt_drop_write_file(file); + __mnt_drop_write_file(file); return ret; } diff --git a/fs/internal.h b/fs/internal.h index a6fd56c68b1..371bcc4b169 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -61,6 +61,10 @@ extern void __init mnt_init(void); extern struct lglock vfsmount_lock; +extern int __mnt_want_write(struct vfsmount *); +extern int __mnt_want_write_file(struct file *); +extern void __mnt_drop_write(struct vfsmount *); +extern void __mnt_drop_write_file(struct file *); /* * fs_struct.c diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 8392cb85bd5..05d29124c6a 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -156,12 +156,16 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) struct nlm_rqst *call; int status; - nlm_get_host(host); call = nlm_alloc_call(host); if (call == NULL) return -ENOMEM; nlmclnt_locks_init_private(fl, host); + if (!fl->fl_u.nfs_fl.owner) { + /* lockowner allocation has failed */ + nlmclnt_release_call(call); + return -ENOMEM; + } /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); @@ -185,9 +189,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc); /* * Allocate an NLM RPC call struct - * - * Note: the caller must hold a reference to host. In case of failure, - * this reference will be released. */ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) { @@ -199,7 +200,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) atomic_set(&call->a_count, 1); locks_init_lock(&call->a_args.lock.fl); locks_init_lock(&call->a_res.lock.fl); - call->a_host = host; + call->a_host = nlm_get_host(host); return call; } if (signalled()) @@ -207,7 +208,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) printk("nlm_alloc_call: failed, waiting for memory\n"); schedule_timeout_interruptible(5*HZ); } - nlmclnt_release_host(host); return NULL; } @@ -750,7 +750,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl dprintk("lockd: blocking lock attempt was interrupted by a signal.\n" " Attempting to cancel lock.\n"); - req = nlm_alloc_call(nlm_get_host(host)); + req = nlm_alloc_call(host); if (!req) return -ENOMEM; req->a_flags = RPC_TASK_ASYNC; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4a43d253c04..b147d1ae71f 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -257,6 +257,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args return rpc_system_err; call = nlm_alloc_call(host); + nlmsvc_release_host(host); if (call == NULL) return rpc_system_err; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index afe4488c33d..fb1a2bedbe9 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, struct nlm_block *block; struct nlm_rqst *call = NULL; - nlm_get_host(host); call = nlm_alloc_call(host); if (call == NULL) return NULL; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index de8f2caa223..3009a365e08 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -297,6 +297,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args return rpc_system_err; call = nlm_alloc_call(host); + nlmsvc_release_host(host); if (call == NULL) return rpc_system_err; diff --git a/fs/locks.c b/fs/locks.c index cdcf219a739..7e81bfc7516 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -200,11 +200,7 @@ void locks_release_private(struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; } - if (fl->fl_lmops) { - if (fl->fl_lmops->lm_release_private) - fl->fl_lmops->lm_release_private(fl); - fl->fl_lmops = NULL; - } + fl->fl_lmops = NULL; } EXPORT_SYMBOL_GPL(locks_release_private); diff --git a/fs/namei.c b/fs/namei.c index 2ccc35c4dc2..1b464390dde 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -650,6 +650,121 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki path_put(link); } +int sysctl_protected_symlinks __read_mostly = 1; +int sysctl_protected_hardlinks __read_mostly = 1; + +/** + * may_follow_link - Check symlink following for unsafe situations + * @link: The path of the symlink + * + * In the case of the sysctl_protected_symlinks sysctl being enabled, + * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is + * in a sticky world-writable directory. This is to protect privileged + * processes from failing races against path names that may change out + * from under them by way of other users creating malicious symlinks. + * It will permit symlinks to be followed only when outside a sticky + * world-writable directory, or when the uid of the symlink and follower + * match, or when the directory owner matches the symlink's owner. + * + * Returns 0 if following the symlink is allowed, -ve on error. + */ +static inline int may_follow_link(struct path *link, struct nameidata *nd) +{ + const struct inode *inode; + const struct inode *parent; + + if (!sysctl_protected_symlinks) + return 0; + + /* Allowed if owner and follower match. */ + inode = link->dentry->d_inode; + if (current_cred()->fsuid == inode->i_uid) + return 0; + + /* Allowed if parent directory not sticky and world-writable. */ + parent = nd->path.dentry->d_inode; + if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) + return 0; + + /* Allowed if parent directory and link owner match. */ + if (parent->i_uid == inode->i_uid) + return 0; + + path_put_conditional(link, nd); + path_put(&nd->path); + audit_log_link_denied("follow_link", link); + return -EACCES; +} + +/** + * safe_hardlink_source - Check for safe hardlink conditions + * @inode: the source inode to hardlink from + * + * Return false if at least one of the following conditions: + * - inode is not a regular file + * - inode is setuid + * - inode is setgid and group-exec + * - access failure for read and write + * + * Otherwise returns true. + */ +static bool safe_hardlink_source(struct inode *inode) +{ + umode_t mode = inode->i_mode; + + /* Special files should not get pinned to the filesystem. */ + if (!S_ISREG(mode)) + return false; + + /* Setuid files should not get pinned to the filesystem. */ + if (mode & S_ISUID) + return false; + + /* Executable setgid files should not get pinned to the filesystem. */ + if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) + return false; + + /* Hardlinking to unreadable or unwritable sources is dangerous. */ + if (inode_permission(inode, MAY_READ | MAY_WRITE)) + return false; + + return true; +} + +/** + * may_linkat - Check permissions for creating a hardlink + * @link: the source to hardlink from + * + * Block hardlink when all of: + * - sysctl_protected_hardlinks enabled + * - fsuid does not match inode + * - hardlink source is unsafe (see safe_hardlink_source() above) + * - not CAP_FOWNER + * + * Returns 0 if successful, -ve on error. + */ +static int may_linkat(struct path *link) +{ + const struct cred *cred; + struct inode *inode; + + if (!sysctl_protected_hardlinks) + return 0; + + cred = current_cred(); + inode = link->dentry->d_inode; + + /* Source inode owner (or CAP_FOWNER) can hardlink all they like, + * otherwise, it must be a safe source. + */ + if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || + capable(CAP_FOWNER)) + return 0; + + audit_log_link_denied("linkat", link); + return -EPERM; +} + static __always_inline int follow_link(struct path *link, struct nameidata *nd, void **p) { @@ -1818,6 +1933,9 @@ static int path_lookupat(int dfd, const char *name, while (err > 0) { void *cookie; struct path link = path; + err = may_follow_link(&link, nd); + if (unlikely(err)) + break; nd->flags |= LOOKUP_PARENT; err = follow_link(&link, nd, &cookie); if (err) @@ -2277,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, - bool *want_write, bool need_lookup, + bool got_write, bool need_lookup, int *opened) { struct inode *dir = nd->path.dentry->d_inode; @@ -2300,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); - if (open_flag & O_EXCL) { + if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { open_flag &= ~O_TRUNC; *opened |= FILE_CREATED; } @@ -2314,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, * Another problem is returing the "right" error value (e.g. for an * O_EXCL open we want to return EEXIST not EROFS). */ - if ((open_flag & (O_CREAT | O_TRUNC)) || - (open_flag & O_ACCMODE) != O_RDONLY) { - error = mnt_want_write(nd->path.mnt); - if (!error) { - *want_write = true; - } else if (!(open_flag & O_CREAT)) { + if (((open_flag & (O_CREAT | O_TRUNC)) || + (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) { + if (!(open_flag & O_CREAT)) { /* * No O_CREATE -> atomicity not a requirement -> fall * back to lookup + open @@ -2327,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto no_open; } else if (open_flag & (O_EXCL | O_TRUNC)) { /* Fall back and fail with the right error */ - create_error = error; + create_error = -EROFS; goto no_open; } else { /* No side effects, safe to clear O_CREAT */ - create_error = error; + create_error = -EROFS; open_flag &= ~O_CREAT; } } @@ -2438,7 +2553,7 @@ looked_up: static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, const struct open_flags *op, - bool *want_write, int *opened) + bool got_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2456,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, file, op, want_write, + return atomic_open(nd, dentry, path, file, op, got_write, need_lookup, opened); } @@ -2480,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path, * a permanent write count is taken through * the 'struct file' in finish_open(). */ - error = mnt_want_write(nd->path.mnt); - if (error) + if (!got_write) { + error = -EROFS; goto out_dput; - *want_write = true; + } *opened |= FILE_CREATED; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) @@ -2513,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path, struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; - bool want_write = false; + bool got_write = false; int acc_mode = op->acc_mode; struct inode *inode; bool symlink_ok = false; @@ -2582,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path, } retry_lookup: + if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { + error = mnt_want_write(nd->path.mnt); + if (!error) + got_write = true; + /* + * do _not_ fail yet - we might not need that or fail with + * a different error; let lookup_open() decide; we'll be + * dropping this one anyway. + */ + } mutex_lock(&dir->d_inode->i_mutex); - error = lookup_open(nd, path, file, op, &want_write, opened); + error = lookup_open(nd, path, file, op, got_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (error <= 0) { @@ -2608,22 +2733,23 @@ retry_lookup: } /* - * It already exists. + * create/update audit record if it already exists. */ - audit_inode(pathname, path->dentry); + if (path->dentry->d_inode) + audit_inode(pathname, path->dentry); /* * If atomic_open() acquired write access it is dropped now due to * possible mount and symlink following (this might be optimized away if * necessary...) */ - if (want_write) { + if (got_write) { mnt_drop_write(nd->path.mnt); - want_write = false; + got_write = false; } error = -EEXIST; - if (open_flag & O_EXCL) + if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) goto exit_dput; error = follow_managed(path, nd->flags); @@ -2684,7 +2810,7 @@ finish_open: error = mnt_want_write(nd->path.mnt); if (error) goto out; - want_write = true; + got_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); @@ -2711,7 +2837,7 @@ opened: goto exit_fput; } out: - if (want_write) + if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); terminate_walk(nd); @@ -2735,9 +2861,9 @@ stale_open: nd->inode = dir->d_inode; save_parent.mnt = NULL; save_parent.dentry = NULL; - if (want_write) { + if (got_write) { mnt_drop_write(nd->path.mnt); - want_write = false; + got_write = false; } retried = true; goto retry_lookup; @@ -2777,6 +2903,9 @@ static struct file *path_openat(int dfd, const char *pathname, error = -ELOOP; break; } + error = may_follow_link(&link, nd); + if (unlikely(error)) + break; nd->flags |= LOOKUP_PARENT; nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); @@ -2846,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path { struct dentry *dentry = ERR_PTR(-EEXIST); struct nameidata nd; + int err2; int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); if (error) return ERR_PTR(error); @@ -2859,16 +2989,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; + /* don't fail immediately if it's r/o, at least try to report other errors */ + err2 = mnt_want_write(nd.path.mnt); /* * Do the final lookup. */ mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); if (IS_ERR(dentry)) - goto fail; + goto unlock; + error = -EEXIST; if (dentry->d_inode) - goto eexist; + goto fail; /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - @@ -2876,23 +3009,37 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path * been asking for (non-existent) directory. -ENOENT for you. */ if (unlikely(!is_dir && nd.last.name[nd.last.len])) { - dput(dentry); - dentry = ERR_PTR(-ENOENT); + error = -ENOENT; + goto fail; + } + if (unlikely(err2)) { + error = err2; goto fail; } *path = nd.path; return dentry; -eexist: - dput(dentry); - dentry = ERR_PTR(-EEXIST); fail: + dput(dentry); + dentry = ERR_PTR(error); +unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + if (!err2) + mnt_drop_write(nd.path.mnt); out: path_put(&nd.path); return dentry; } EXPORT_SYMBOL(kern_path_create); +void done_path_create(struct path *path, struct dentry *dentry) +{ + dput(dentry); + mutex_unlock(&path->dentry->d_inode->i_mutex); + mnt_drop_write(path->mnt); + path_put(path); +} +EXPORT_SYMBOL(done_path_create); + struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) { char *tmp = getname(pathname); @@ -2956,8 +3103,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, struct path path; int error; - if (S_ISDIR(mode)) - return -EPERM; + error = may_mknod(mode); + if (error) + return error; dentry = user_path_create(dfd, filename, &path, 0); if (IS_ERR(dentry)) @@ -2965,15 +3113,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = may_mknod(mode); - if (error) - goto out_dput; - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_mknod(&path, dentry, mode, dev); if (error) - goto out_drop_write; + goto out; switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(path.dentry->d_inode,dentry,mode,true); @@ -2986,13 +3128,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); break; } -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); - +out: + done_path_create(&path, dentry); return error; } @@ -3038,19 +3175,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_mkdir(&path, dentry, mode); - if (error) - goto out_drop_write; - error = vfs_mkdir(path.dentry->d_inode, dentry, mode); -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + if (!error) + error = vfs_mkdir(path.dentry->d_inode, dentry, mode); + done_path_create(&path, dentry); return error; } @@ -3144,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname) } nd.flags &= ~LOOKUP_PARENT; + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit1; mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); @@ -3154,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname) error = -ENOENT; goto exit3; } - error = mnt_want_write(nd.path.mnt); - if (error) - goto exit3; error = security_path_rmdir(&nd.path, dentry); if (error) - goto exit4; + goto exit3; error = vfs_rmdir(nd.path.dentry->d_inode, dentry); -exit4: - mnt_drop_write(nd.path.mnt); exit3: dput(dentry); exit2: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); putname(name); @@ -3233,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) goto exit1; nd.flags &= ~LOOKUP_PARENT; + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit1; mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); @@ -3245,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname) if (!inode) goto slashes; ihold(inode); - error = mnt_want_write(nd.path.mnt); - if (error) - goto exit2; error = security_path_unlink(&nd.path, dentry); if (error) - goto exit3; + goto exit2; error = vfs_unlink(nd.path.dentry->d_inode, dentry); -exit3: - mnt_drop_write(nd.path.mnt); - exit2: +exit2: dput(dentry); } mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ + mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); putname(name); @@ -3324,19 +3450,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, if (IS_ERR(dentry)) goto out_putname; - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_symlink(&path, dentry, from); - if (error) - goto out_drop_write; - error = vfs_symlink(path.dentry->d_inode, dentry, from); -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + if (!error) + error = vfs_symlink(path.dentry->d_inode, dentry, from); + done_path_create(&path, dentry); out_putname: putname(from); return error; @@ -3436,19 +3553,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; - error = mnt_want_write(new_path.mnt); - if (error) + error = may_linkat(&old_path); + if (unlikely(error)) goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) - goto out_drop_write; + goto out_dput; error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); -out_drop_write: - mnt_drop_write(new_path.mnt); out_dput: - dput(new_dentry); - mutex_unlock(&new_path.dentry->d_inode->i_mutex); - path_put(&new_path); + done_path_create(&new_path, new_dentry); out: path_put(&old_path); @@ -3644,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, if (newnd.last_type != LAST_NORM) goto exit2; + error = mnt_want_write(oldnd.path.mnt); + if (error) + goto exit2; + oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; newnd.flags |= LOOKUP_RENAME_TARGET; @@ -3679,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, if (new_dentry == trap) goto exit5; - error = mnt_want_write(oldnd.path.mnt); - if (error) - goto exit5; error = security_path_rename(&oldnd.path, old_dentry, &newnd.path, new_dentry); if (error) - goto exit6; + goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); -exit6: - mnt_drop_write(oldnd.path.mnt); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_dir, old_dir); + mnt_drop_write(oldnd.path.mnt); exit2: path_put(&newnd.path); putname(to); diff --git a/fs/namespace.c b/fs/namespace.c index c53d3381b0d..4d31f73e256 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt) } /* - * Most r/o checks on a fs are for operations that take - * discrete amounts of time, like a write() or unlink(). - * We must keep track of when those operations start - * (for permission checks) and when they end, so that - * we can determine when writes are able to occur to - * a filesystem. + * Most r/o & frozen checks on a fs are for operations that take discrete + * amounts of time, like a write() or unlink(). We must keep track of when + * those operations start (for permission checks) and when they end, so that we + * can determine when writes are able to occur to a filesystem. */ /** - * mnt_want_write - get write access to a mount + * __mnt_want_write - get write access to a mount without freeze protection * @m: the mount on which to take a write * - * This tells the low-level filesystem that a write is - * about to be performed to it, and makes sure that - * writes are allowed before returning success. When - * the write operation is finished, mnt_drop_write() - * must be called. This is effectively a refcount. + * This tells the low-level filesystem that a write is about to be performed to + * it, and makes sure that writes are allowed (mnt it read-write) before + * returning success. This operation does not protect against filesystem being + * frozen. When the write operation is finished, __mnt_drop_write() must be + * called. This is effectively a refcount. */ -int mnt_want_write(struct vfsmount *m) +int __mnt_want_write(struct vfsmount *m) { struct mount *mnt = real_mount(m); int ret = 0; @@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m) ret = -EROFS; } preempt_enable(); + + return ret; +} + +/** + * mnt_want_write - get write access to a mount + * @m: the mount on which to take a write + * + * This tells the low-level filesystem that a write is about to be performed to + * it, and makes sure that writes are allowed (mount is read-write, filesystem + * is not frozen) before returning success. When the write operation is + * finished, mnt_drop_write() must be called. This is effectively a refcount. + */ +int mnt_want_write(struct vfsmount *m) +{ + int ret; + + sb_start_write(m->mnt_sb); + ret = __mnt_want_write(m); + if (ret) + sb_end_write(m->mnt_sb); return ret; } EXPORT_SYMBOL_GPL(mnt_want_write); @@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt) EXPORT_SYMBOL_GPL(mnt_clone_write); /** - * mnt_want_write_file - get write access to a file's mount + * __mnt_want_write_file - get write access to a file's mount * @file: the file who's mount on which to take a write * - * This is like mnt_want_write, but it takes a file and can + * This is like __mnt_want_write, but it takes a file and can * do some optimisations if the file is open for write already */ -int mnt_want_write_file(struct file *file) +int __mnt_want_write_file(struct file *file) { struct inode *inode = file->f_dentry->d_inode; + if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) - return mnt_want_write(file->f_path.mnt); + return __mnt_want_write(file->f_path.mnt); else return mnt_clone_write(file->f_path.mnt); } + +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already + */ +int mnt_want_write_file(struct file *file) +{ + int ret; + + sb_start_write(file->f_path.mnt->mnt_sb); + ret = __mnt_want_write_file(file); + if (ret) + sb_end_write(file->f_path.mnt->mnt_sb); + return ret; +} EXPORT_SYMBOL_GPL(mnt_want_write_file); /** - * mnt_drop_write - give up write access to a mount + * __mnt_drop_write - give up write access to a mount * @mnt: the mount on which to give up write access * * Tells the low-level filesystem that we are done * performing writes to it. Must be matched with - * mnt_want_write() call above. + * __mnt_want_write() call above. */ -void mnt_drop_write(struct vfsmount *mnt) +void __mnt_drop_write(struct vfsmount *mnt) { preempt_disable(); mnt_dec_writers(real_mount(mnt)); preempt_enable(); } + +/** + * mnt_drop_write - give up write access to a mount + * @mnt: the mount on which to give up write access + * + * Tells the low-level filesystem that we are done performing writes to it and + * also allows filesystem to be frozen again. Must be matched with + * mnt_want_write() call above. + */ +void mnt_drop_write(struct vfsmount *mnt) +{ + __mnt_drop_write(mnt); + sb_end_write(mnt->mnt_sb); +} EXPORT_SYMBOL_GPL(mnt_drop_write); +void __mnt_drop_write_file(struct file *file) +{ + __mnt_drop_write(file->f_path.mnt); +} + void mnt_drop_write_file(struct file *file) { mnt_drop_write(file->f_path.mnt); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 404c6a8ac39..db7ad719628 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -30,7 +30,7 @@ config NFS_FS If unsure, say N. config NFS_V2 - bool "NFS client support for NFS version 2" + tristate "NFS client support for NFS version 2" depends on NFS_FS default y help @@ -40,7 +40,7 @@ config NFS_V2 If unsure, say Y. config NFS_V3 - bool "NFS client support for NFS version 3" + tristate "NFS client support for NFS version 3" depends on NFS_FS default y help @@ -72,7 +72,7 @@ config NFS_V3_ACL If unsure, say N. config NFS_V4 - bool "NFS client support for NFS version 4" + tristate "NFS client support for NFS version 4" depends on NFS_FS select SUNRPC_GSS select KEYS @@ -86,6 +86,14 @@ config NFS_V4 If unsure, say Y. +config NFS_SWAP + bool "Provide swap over NFS support" + default n + depends on NFS_FS + select SUNRPC_SWAP + help + This option enables swapon to work on files located on NFS mounts. + config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" depends on NFS_V4 && EXPERIMENTAL diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 0b96c203834..8bf3a3f6925 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,21 +9,22 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o -nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o -nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o -nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - nfs4super.o nfs4file.o delegation.o idmap.o \ - callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o nfs4client.o -nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfs-$(CONFIG_SYSCTL) += sysctl.o +nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o -ifeq ($(CONFIG_SYSCTL), y) -nfs-y += sysctl.o -nfs-$(CONFIG_NFS_V4) += nfs4sysctl.o -endif +obj-$(CONFIG_NFS_V2) += nfs2.o +nfs2-y := nfs2super.o proc.o nfs2xdr.o -nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o +obj-$(CONFIG_NFS_V3) += nfs3.o +nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o + +obj-$(CONFIG_NFS_V4) += nfs4.o +nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ + delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ + nfs4namespace.o nfs4getroot.o nfs4client.o +nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7ae8a608956..dd392ed5f2e 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -228,6 +228,14 @@ bl_end_par_io_read(void *data, int unused) schedule_work(&rdata->task.u.tk_work); } +static bool +bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) +{ + if ((offset & blkmask) || (len & blkmask)) + return false; + return true; +} + static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -244,6 +252,9 @@ bl_read_pagelist(struct nfs_read_data *rdata) dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); + if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) + goto use_mds; + par = alloc_parallel(rdata); if (!par) goto use_mds; @@ -552,7 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; - struct parallel_io *par; + struct parallel_io *par = NULL; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; struct page **pages = wdata->args.pages; @@ -563,6 +574,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + /* Check for alignment first */ + if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) + goto out_mds; + /* At this point, wdata->pages is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. @@ -996,14 +1011,32 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } +static void +bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + nfs_pageio_reset_read_mds(pgio); + else + pnfs_generic_pg_init_read(pgio, req); +} + +static void +bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + nfs_pageio_reset_write_mds(pgio); + else + pnfs_generic_pg_init_write(pgio, req); +} + static const struct nfs_pageio_ops bl_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = bl_pg_init_read, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops bl_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = bl_pg_init_write, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 23ff18fe080..4c8459e5bde 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -37,31 +37,7 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; -unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; -#define NFS_CALLBACK_MAXPORTNR (65535U) - -static int param_set_portnr(const char *val, const struct kernel_param *kp) -{ - unsigned long num; - int ret; - - if (!val) - return -EINVAL; - ret = strict_strtoul(val, 0, &num); - if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) - return -EINVAL; - *((unsigned int *)kp->arg) = num; - return 0; -} -static struct kernel_param_ops param_ops_portnr = { - .set = param_set_portnr, - .get = param_get_uint, -}; -#define param_check_portnr(name, p) __param_check(name, p, unsigned int); - -module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); /* * This is the NFSv4 callback kernel thread. @@ -265,6 +241,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) ret = -ENOMEM; goto out_err; } + /* As there is only one thread we need to over-ride the + * default maximum of 80 connections + */ + serv->sv_maxconn = 1024; ret = svc_bind(serv, net); if (ret < 0) { diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index a5527c90a5a..b44d7b128b7 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -192,7 +192,7 @@ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_process_state *cps); extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, struct cb_process_state *cps); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion); extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 65afa382c5e..9fc0d9dfc91 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -51,25 +51,23 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); +static DEFINE_SPINLOCK(nfs_version_lock); +static DEFINE_MUTEX(nfs_version_mutex); +static LIST_HEAD(nfs_versions); /* * RPC cruft for NFS */ static const struct rpc_version *nfs_version[5] = { -#ifdef CONFIG_NFS_V2 - [2] = &nfs_version2, -#endif -#ifdef CONFIG_NFS_V3 - [3] = &nfs_version3, -#endif -#ifdef CONFIG_NFS_V4 - [4] = &nfs_version4, -#endif + [2] = NULL, + [3] = NULL, + [4] = NULL, }; const struct rpc_program nfs_program = { @@ -85,21 +83,64 @@ struct rpc_stat nfs_rpcstat = { .program = &nfs_program }; +static struct nfs_subversion *find_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs; + spin_lock(&nfs_version_lock); -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static const struct rpc_version *nfsacl_version[] = { - [3] = &nfsacl_version3, -}; + list_for_each_entry(nfs, &nfs_versions, list) { + if (nfs->rpc_ops->version == version) { + spin_unlock(&nfs_version_lock); + return nfs; + } + }; -const struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ + spin_unlock(&nfs_version_lock); + return ERR_PTR(-EPROTONOSUPPORT);; +} + +struct nfs_subversion *get_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs = find_nfs_version(version); + + if (IS_ERR(nfs)) { + mutex_lock(&nfs_version_mutex); + request_module("nfs%d", version); + nfs = find_nfs_version(version); + mutex_unlock(&nfs_version_mutex); + } + + if (!IS_ERR(nfs)) + try_module_get(nfs->owner); + return nfs; +} + +void put_nfs_version(struct nfs_subversion *nfs) +{ + module_put(nfs->owner); +} + +void register_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + list_add(&nfs->list, &nfs_versions); + nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers; + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(register_nfs_version); + +void unregister_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + nfs_version[nfs->rpc_ops->version] = NULL; + list_del(&nfs->list); + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(unregister_nfs_version); /* * Allocate a shared client record @@ -116,7 +157,10 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - clp->rpc_ops = cl_init->rpc_ops; + clp->cl_nfs_mod = cl_init->nfs_mod; + try_module_get(clp->cl_nfs_mod->owner); + + clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; atomic_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; @@ -145,12 +189,14 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) return clp; error_cleanup: + put_nfs_version(clp->cl_nfs_mod); kfree(clp); error_0: return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(nfs_alloc_client); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -205,11 +251,13 @@ void nfs_free_client(struct nfs_client *clp) put_rpccred(clp->cl_machine_cred); put_net(clp->cl_net); + put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp); dprintk("<-- nfs_free_client()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_client); /* * Release a reference to a shared client record @@ -324,6 +372,7 @@ int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, } return 0; } +EXPORT_SYMBOL_GPL(nfs_sockaddr_match_ipaddr); #endif /* CONFIG_NFS_V4_1 */ /* @@ -362,7 +411,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat continue; /* Different NFS versions cannot share the same nfs_client */ - if (clp->rpc_ops != data->rpc_ops) + if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue; if (clp->cl_proto != data->proto) @@ -390,6 +439,7 @@ int nfs_wait_client_init_complete(const struct nfs_client *clp) return wait_event_killable(nfs_client_active_wq, nfs_client_init_is_complete(clp)); } +EXPORT_SYMBOL_GPL(nfs_wait_client_init_complete); /* * Found an existing client. Make sure it's ready before returning. @@ -431,9 +481,10 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); + const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; dprintk("--> nfs_get_client(%s,v%u)\n", - cl_init->hostname ?: "", cl_init->rpc_ops->version); + cl_init->hostname ?: "", rpc_ops->version); /* see if the client already exists */ do { @@ -450,20 +501,20 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, list_add(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; - return cl_init->rpc_ops->init_client(new, - timeparms, ip_addr, - authflavour); + return rpc_ops->init_client(new, timeparms, ip_addr, + authflavour); } spin_unlock(&nn->nfs_client_lock); - new = cl_init->rpc_ops->alloc_client(cl_init); + new = rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", cl_init->hostname ?: "", PTR_ERR(new)); return new; } +EXPORT_SYMBOL_GPL(nfs_get_client); /* * Mark a server as ready or failed @@ -474,6 +525,7 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } +EXPORT_SYMBOL_GPL(nfs_mark_client_ready); /* * Initialise the timeout values for a connection @@ -515,6 +567,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, BUG(); } } +EXPORT_SYMBOL_GPL(nfs_init_timeout_values); /* * Create an RPC client handle @@ -554,6 +607,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, clp->cl_rpcclient = clnt; return 0; } +EXPORT_SYMBOL_GPL(nfs_create_rpc_client); /* * Version 2 or 3 client destruction @@ -606,36 +660,6 @@ static int nfs_start_lockd(struct nfs_server *server) } /* - * Initialise an NFSv3 ACL client connection - */ -#ifdef CONFIG_NFS_V3_ACL -static void nfs_init_server_aclclient(struct nfs_server *server) -{ - if (server->nfs_client->rpc_ops->version != 3) - goto out_noacl; - if (server->flags & NFS_MOUNT_NOACL) - goto out_noacl; - - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - if (IS_ERR(server->client_acl)) - goto out_noacl; - - /* No errors! Assume that Sun nfsacls are supported */ - server->caps |= NFS_CAP_ACLS; - return; - -out_noacl: - server->caps &= ~NFS_CAP_ACLS; -} -#else -static inline void nfs_init_server_aclclient(struct nfs_server *server) -{ - server->flags &= ~NFS_MOUNT_NOACL; - server->caps &= ~NFS_CAP_ACLS; -} -#endif - -/* * Create a general RPC client */ int nfs_init_server_rpcclient(struct nfs_server *server, @@ -670,6 +694,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server, return 0; } +EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); /** * nfs_init_client - Initialise an NFS2 or NFS3 client @@ -709,18 +734,20 @@ error: dprintk("<-- nfs_init_client() = xerror %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_init_client); /* * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) + const struct nfs_parsed_mount_data *data, + struct nfs_subversion *nfs_mod) { struct nfs_client_initdata cl_init = { .hostname = data->nfs_server.hostname, .addr = (const struct sockaddr *)&data->nfs_server.address, .addrlen = data->nfs_server.addrlen, - .rpc_ops = NULL, + .nfs_mod = nfs_mod, .proto = data->nfs_server.protocol, .net = data->net, }; @@ -730,21 +757,6 @@ static int nfs_init_server(struct nfs_server *server, dprintk("--> nfs_init_server()\n"); - switch (data->version) { -#ifdef CONFIG_NFS_V2 - case 2: - cl_init.rpc_ops = &nfs_v2_clientops; - break; -#endif -#ifdef CONFIG_NFS_V3 - case 3: - cl_init.rpc_ops = &nfs_v3_clientops; - break; -#endif - default: - return -EPROTONOSUPPORT; - } - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) @@ -798,8 +810,6 @@ static int nfs_init_server(struct nfs_server *server, server->mountd_protocol = data->mount_server.protocol; server->namelen = data->namlen; - /* Create a client RPC handle for the NFSv3 ACL management interface */ - nfs_init_server_aclclient(server); dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); return 0; @@ -911,6 +921,7 @@ out_error: dprintk("nfs_probe_fsinfo: error = %d\n", -error); return error; } +EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); /* * Copy useful information when duplicating a server record @@ -927,6 +938,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->caps = source->caps; target->options = source->options; } +EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); void nfs_server_insert_lists(struct nfs_server *server) { @@ -940,6 +952,7 @@ void nfs_server_insert_lists(struct nfs_server *server) spin_unlock(&nn->nfs_client_lock); } +EXPORT_SYMBOL_GPL(nfs_server_insert_lists); static void nfs_server_remove_lists(struct nfs_server *server) { @@ -999,6 +1012,7 @@ struct nfs_server *nfs_alloc_server(void) return server; } +EXPORT_SYMBOL_GPL(nfs_alloc_server); /* * Free up a server record @@ -1027,13 +1041,14 @@ void nfs_free_server(struct nfs_server *server) nfs_release_automount_timer(); dprintk("<-- nfs_free_server()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_server); /* * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) +struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct nfs_server *server; struct nfs_fattr *fattr; @@ -1049,7 +1064,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, goto error; /* Get a client representation */ - error = nfs_init_server(server, data); + error = nfs_init_server(server, mount_info->parsed, nfs_mod); if (error < 0) goto error; @@ -1058,13 +1073,13 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) @@ -1072,7 +1087,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; @@ -1094,6 +1109,7 @@ error: nfs_free_server(server); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_create_server); /* * Clone an NFS2, NFS3 or NFS4 server record @@ -1133,8 +1149,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, flavor); if (error < 0) goto out_free_server; - if (!IS_ERR(source->client_acl)) - nfs_init_server_aclclient(server); /* probe the filesystem info for this server filesystem */ error = nfs_probe_fsinfo(server, fh, fattr_fsinfo); @@ -1165,6 +1179,7 @@ out_free_server: dprintk("<-- nfs_clone_server() = error %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_clone_server); void nfs_clients_init(struct net *net) { @@ -1172,7 +1187,7 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_client_list); INIT_LIST_HEAD(&nn->nfs_volume_list); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 1f3ccd93463..bbc6a4dba0d 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -8,7 +8,7 @@ #ifndef FS_NFS_DELEGATION_H #define FS_NFS_DELEGATION_H -#if defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V4) /* * NFSv4 delegation */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d49f1b9cd3f..627f108ede2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -17,6 +17,7 @@ * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM */ +#include <linux/module.h> #include <linux/time.h> #include <linux/errno.h> #include <linux/stat.h> @@ -935,6 +936,7 @@ void nfs_force_lookup_revalidate(struct inode *dir) { NFS_I(dir)->cache_change_attribute++; } +EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); /* * A check for whether or not the parent directory has changed. @@ -1196,6 +1198,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; +EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { @@ -1263,8 +1266,9 @@ out: nfs_free_fhandle(fhandle); return res; } +EXPORT_SYMBOL_GPL(nfs_lookup); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { @@ -1274,6 +1278,7 @@ const struct dentry_operations nfs4_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; +EXPORT_SYMBOL_GPL(nfs4_dentry_operations); static fmode_t flags_to_mode(int flags) { @@ -1416,6 +1421,7 @@ no_open: return finish_no_open(file, res); } +EXPORT_SYMBOL_GPL(nfs_atomic_open); static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { @@ -1508,6 +1514,7 @@ out_error: dput(parent); return error; } +EXPORT_SYMBOL_GPL(nfs_instantiate); /* * Following a failed create operation, we drop the dentry rather @@ -1536,6 +1543,7 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_create); /* * See comments for nfs_proc_create regarding failed operations. @@ -1563,6 +1571,7 @@ out_err: d_drop(dentry); return status; } +EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. @@ -1586,6 +1595,7 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { @@ -1609,6 +1619,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } +EXPORT_SYMBOL_GPL(nfs_rmdir); /* * Remove a file after making sure there are no pending writes, @@ -1680,6 +1691,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) d_rehash(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_unlink); /* * To create a symbolic link, most file systems instantiate a new inode, @@ -1750,6 +1762,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) return 0; } +EXPORT_SYMBOL_GPL(nfs_symlink); int nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -1771,6 +1784,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } return error; } +EXPORT_SYMBOL_GPL(nfs_link); /* * RENAME @@ -1869,6 +1883,7 @@ out: dput(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_rename); static DEFINE_SPINLOCK(nfs_access_lru_lock); static LIST_HEAD(nfs_access_lru_list); @@ -1969,6 +1984,7 @@ void nfs_access_zap_cache(struct inode *inode) spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); } +EXPORT_SYMBOL_GPL(nfs_access_zap_cache); static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) { @@ -2129,6 +2145,7 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) { return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } +EXPORT_SYMBOL_GPL(nfs_may_open); int nfs_permission(struct inode *inode, int mask) { @@ -2188,6 +2205,7 @@ out_notsup: res = generic_permission(inode, mask); goto out; } +EXPORT_SYMBOL_GPL(nfs_permission); /* * Local variables: diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec7..1ba385b7c90 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * @nr_segs: size of iovec array * * The presence of this routine in the address space ops vector means - * the NFS client supports direct I/O. However, we shunt off direct - * read and write requests before the VFS gets them, so this method - * should never be called. + * the NFS client supports direct I/O. However, for most direct IO, we + * shunt off direct read and write requests before the VFS gets them, + * so this method is only ever called for swap. */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { +#ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", iocb->ki_filp->f_path.dentry->d_name.name, (long long) pos, nr_segs); return -EINVAL; +#else + VM_BUG_ON(iocb->ki_left != PAGE_SIZE); + VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); + + if (rw == READ || rw == KERNEL_READ) + return nfs_file_direct_read(iocb, iov, nr_segs, pos, + rw == READ ? true : false); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, + rw == WRITE ? true : false); +#endif /* CONFIG_NFS_SWAP */ } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { */ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de GFP_KERNEL); if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 1, pagevec); + if (WARN_ON(result != 1)) + break; + } + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { @@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; @@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); NFS_I(inode)->read_io += result; @@ -460,7 +479,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; @@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode */ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + npages, 0, 0, pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 0, pagevec); + if (WARN_ON(result != 1)) + break; + } if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; @@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos); + result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, - size_t count) + size_t count, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -867,7 +893,7 @@ out: * cache. */ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, task_io_account_read(count); - retval = nfs_direct_read(iocb, iov, nr_segs, pos); + retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); if (retval > 0) iocb->ki_pos = pos + retval; @@ -923,7 +949,7 @@ out: * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); - retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); if (retval > 0) { struct inode *inode = mapping->host; diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index b3924b8a600..31c26c4dcc2 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -8,6 +8,7 @@ #ifdef CONFIG_NFS_USE_KERNEL_DNS +#include <linux/module.h> #include <linux/sunrpc/clnt.h> #include <linux/dns_resolver.h> #include "dns_resolve.h" @@ -27,9 +28,11 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, kfree(ip_addr); return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #else +#include <linux/module.h> #include <linux/hash.h> #include <linux/string.h> #include <linux/kmod.h> @@ -345,6 +348,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, ret = -ESRCH; return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); int nfs_dns_resolver_cache_init(struct net *net) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 70d124a61b9..75d6d0a3d32 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -16,6 +16,7 @@ * nfs regular file handling functions */ +#include <linux/module.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -52,6 +53,7 @@ int nfs_check_flags(int flags) return 0; } +EXPORT_SYMBOL_GPL(nfs_check_flags); /* * Open file @@ -84,6 +86,7 @@ nfs_file_release(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); } +EXPORT_SYMBOL_GPL(nfs_file_release); /** * nfs_revalidate_size - Revalidate the file size @@ -137,6 +140,7 @@ loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return generic_file_llseek(filp, offset, origin); } +EXPORT_SYMBOL_GPL(nfs_file_llseek); /* * Flush all dirty pages, and check for write errors. @@ -165,6 +169,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) /* Flush writes to the server and return any errors */ return vfs_fsync(file, 0); } +EXPORT_SYMBOL_GPL(nfs_file_flush); ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, @@ -175,7 +180,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos); + return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); dprintk("NFS: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -189,6 +194,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, } return result; } +EXPORT_SYMBOL_GPL(nfs_file_read); ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, @@ -211,6 +217,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, } return res; } +EXPORT_SYMBOL_GPL(nfs_file_splice_read); int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -232,6 +239,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) } return status; } +EXPORT_SYMBOL_GPL(nfs_file_mmap); /* * Flush any dirty pages for this process, and check for write errors. @@ -270,6 +278,7 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) ret = status; return ret; } +EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); static int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -430,7 +439,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_cancel(page->mapping->host, page); + nfs_wb_page_cancel(page_file_mapping(page)->host, page); nfs_fscache_invalidate_page(page, page->mapping->host); } @@ -447,8 +456,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not + * doing this memory reclaim for a fs-related allocation. + */ + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && + !(current->flags & PF_FSTRANS)) { int how = FLUSH_SYNC; /* Don't let kswapd deadlock waiting for OOM RPC calls */ @@ -472,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp) */ static int nfs_launder_page(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_inode *nfsi = NFS_I(inode); dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", @@ -482,6 +494,20 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } +#ifdef CONFIG_NFS_SWAP +static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + *span = sis->pages; + return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); +} + +static void nfs_swap_deactivate(struct file *file) +{ + xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); +} +#endif + const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, @@ -496,6 +522,10 @@ const struct address_space_operations nfs_file_aops = { .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, .error_remove_page = generic_error_remove_page, +#ifdef CONFIG_NFS_SWAP + .swap_activate = nfs_swap_activate, + .swap_deactivate = nfs_swap_deactivate, +#endif }; /* @@ -521,7 +551,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); lock_page(page); - mapping = page->mapping; + mapping = page_file_mapping(page); if (mapping != dentry->d_inode->i_mapping) goto out_unlock; @@ -570,7 +600,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); dprintk("NFS: write(%s/%s, %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -611,6 +641,7 @@ out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); goto out; } +EXPORT_SYMBOL_GPL(nfs_file_write); ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, struct file *filp, loff_t *ppos, @@ -642,6 +673,7 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); return ret; } +EXPORT_SYMBOL_GPL(nfs_file_splice_write); static int do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) @@ -802,6 +834,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) out_err: return ret; } +EXPORT_SYMBOL_GPL(nfs_lock); /* * Lock a (portion of) a file @@ -819,6 +852,15 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; + /* + * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of + * any standard. In principle we might be able to support LOCK_MAND + * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the + * NFS code is not set up for it. + */ + if (fl->fl_type & LOCK_MAND) + return -EINVAL; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; @@ -831,6 +873,7 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); } +EXPORT_SYMBOL_GPL(nfs_flock); /* * There is no protocol support for leases, so we have no way to implement @@ -843,6 +886,7 @@ int nfs_setlease(struct file *file, long arg, struct file_lock **fl) file->f_path.dentry->d_name.name, arg); return -EINVAL; } +EXPORT_SYMBOL_GPL(nfs_setlease); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, @@ -862,3 +906,4 @@ const struct file_operations nfs_file_operations = { .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; +EXPORT_SYMBOL_GPL(nfs_file_operations); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 864c51e4b40..b701358c39c 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -52,8 +52,6 @@ #define NFS_UINT_MAXLEN 11 -/* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600; static const struct cred *id_resolver_cache; static struct key_type key_type_id_resolver_legacy; @@ -205,12 +203,18 @@ static int nfs_idmap_init_keyring(void) if (ret < 0) goto failed_put_key; + ret = register_key_type(&key_type_id_resolver_legacy); + if (ret < 0) + goto failed_reg_legacy; + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; id_resolver_cache = cred; return 0; +failed_reg_legacy: + unregister_key_type(&key_type_id_resolver); failed_put_key: key_put(keyring); failed_put_cred: @@ -222,6 +226,7 @@ static void nfs_idmap_quit_keyring(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); + unregister_key_type(&key_type_id_resolver_legacy); put_cred(id_resolver_cache); } @@ -359,7 +364,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ } /* idmap classic begins here */ -module_param(nfs_idmap_cache_timeout, int, 0644); enum { Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err @@ -385,7 +389,7 @@ static const struct rpc_pipe_ops idmap_upcall_ops = { }; static struct key_type key_type_id_resolver_legacy = { - .name = "id_resolver", + .name = "id_legacy", .instantiate = user_instantiate, .match = user_match, .revoke = user_revoke, @@ -674,6 +678,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, if (ret < 0) goto out2; + BUG_ON(idmap->idmap_key_cons != NULL); idmap->idmap_key_cons = cons; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); @@ -687,8 +692,7 @@ out2: out1: kfree(msg); out0: - key_revoke(cons->key); - key_revoke(cons->authkey); + complete_request_key(cons, ret); return ret; } @@ -722,11 +726,18 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons = idmap->idmap_key_cons; + struct key_construction *cons; struct idmap_msg im; size_t namelen_in; int ret; + /* If instantiation is successful, anyone waiting for key construction + * will have been woken up and someone else may now have used + * idmap_key_cons - so after this point we may no longer touch it. + */ + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + if (mlen != sizeof(im)) { ret = -ENOSPC; goto out; @@ -739,7 +750,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { ret = mlen; - complete_request_key(idmap->idmap_key_cons, -ENOKEY); + complete_request_key(cons, -ENOKEY); goto out_incomplete; } @@ -756,7 +767,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } out: - complete_request_key(idmap->idmap_key_cons, ret); + complete_request_key(cons, ret); out_incomplete: return ret; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 35f7e4bc680..c6e895f0fbf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -50,6 +50,7 @@ #include "fscache.h" #include "dns_resolve.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -81,6 +82,7 @@ int nfs_wait_bit_killable(void *word) freezable_schedule(); return 0; } +EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); /** * nfs_compat_user_ino64 - returns the user-visible inode number @@ -105,7 +107,7 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -static void nfs_clear_inode(struct inode *inode) +void nfs_clear_inode(struct inode *inode) { /* * The following should never happen... @@ -116,6 +118,7 @@ static void nfs_clear_inode(struct inode *inode) nfs_access_zap_cache(inode); nfs_fscache_release_inode_cookie(inode); } +EXPORT_SYMBOL_GPL(nfs_clear_inode); void nfs_evict_inode(struct inode *inode) { @@ -185,6 +188,7 @@ void nfs_zap_acl_cache(struct inode *inode) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { @@ -192,6 +196,7 @@ void nfs_invalidate_atime(struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_invalidate_atime); /* * Invalidate, but do not unhash, the inode. @@ -390,6 +395,7 @@ out_no_inode: dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } +EXPORT_SYMBOL_GPL(nfs_fhget); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) @@ -437,6 +443,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) out: return error; } +EXPORT_SYMBOL_GPL(nfs_setattr); /** * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall @@ -495,6 +502,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) nfs_vmtruncate(inode, attr->ia_size); } } +EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -534,6 +542,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) out: return err; } +EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { @@ -622,6 +631,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) return; nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_close_context); struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode) { @@ -648,6 +658,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f ctx->mdsthreshold = NULL; return ctx; } +EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { @@ -655,6 +666,7 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) atomic_inc(&ctx->lock_context.count); return ctx; } +EXPORT_SYMBOL_GPL(get_nfs_open_context); static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { @@ -682,6 +694,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) { __put_nfs_open_context(ctx, 0); } +EXPORT_SYMBOL_GPL(put_nfs_open_context); /* * Ensure that mmap has a recent RPC credential for use when writing out @@ -697,6 +710,7 @@ void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) list_add(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* * Given an inode, search for an open context with the desired characteristics @@ -841,6 +855,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { @@ -882,6 +897,10 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; + /* swapfiles are not supposed to be shared. */ + if (IS_SWAPFILE(inode)) + goto out; + if (nfs_mapping_need_revalidate_inode(inode)) { ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (ret < 0) @@ -1027,6 +1046,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->owner_name = NULL; fattr->group_name = NULL; } +EXPORT_SYMBOL_GPL(nfs_fattr_init); struct nfs_fattr *nfs_alloc_fattr(void) { @@ -1037,6 +1057,7 @@ struct nfs_fattr *nfs_alloc_fattr(void) nfs_fattr_init(fattr); return fattr; } +EXPORT_SYMBOL_GPL(nfs_alloc_fattr); struct nfs_fh *nfs_alloc_fhandle(void) { @@ -1047,6 +1068,7 @@ struct nfs_fh *nfs_alloc_fhandle(void) fh->size = 0; return fh; } +EXPORT_SYMBOL_GPL(nfs_alloc_fhandle); #ifdef NFS_DEBUG /* @@ -1167,6 +1189,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) return status; } +EXPORT_SYMBOL_GPL(nfs_refresh_inode); static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { @@ -1203,6 +1226,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache @@ -1254,6 +1278,7 @@ out_noforce: spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); /* * Many nfs protocol calls return the new file attributes after @@ -1471,27 +1496,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) return -ESTALE; } - -#ifdef CONFIG_NFS_V4 - -/* - * Clean out any remaining NFSv4 state that might be left over due - * to open() calls that passed nfs_atomic_lookup, but failed to call - * nfs_open(). - */ -void nfs4_evict_inode(struct inode *inode) -{ - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); - pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); - /* If we are holding a delegation, return it! */ - nfs_inode_return_delegation_noreclaim(inode); - /* First call standard NFS clear_inode() code */ - nfs_clear_inode(inode); -} -#endif - struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; @@ -1504,11 +1508,12 @@ struct inode *nfs_alloc_inode(struct super_block *sb) nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); #endif -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } +EXPORT_SYMBOL_GPL(nfs_alloc_inode); static void nfs_i_callback(struct rcu_head *head) { @@ -1520,10 +1525,11 @@ void nfs_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, nfs_i_callback); } +EXPORT_SYMBOL_GPL(nfs_destroy_inode); static inline void nfs4_init_once(struct nfs_inode *nfsi) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) INIT_LIST_HEAD(&nfsi->open_states); nfsi->delegation = NULL; nfsi->delegation_state = 0; @@ -1569,6 +1575,7 @@ static void nfs_destroy_inodecache(void) } struct workqueue_struct *nfsiod_workqueue; +EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* * start up the nfsiod workqueue @@ -1629,94 +1636,80 @@ static int __init init_nfs_fs(void) err = nfs_dns_resolver_init(); if (err < 0) - goto out11; + goto out10;; err = register_pernet_subsys(&nfs_net_ops); if (err < 0) - goto out10; + goto out9; err = nfs_fscache_register(); if (err < 0) - goto out9; + goto out8; err = nfsiod_start(); if (err) - goto out8; + goto out7; err = nfs_fs_proc_init(); if (err) - goto out7; + goto out6; err = nfs_init_nfspagecache(); if (err) - goto out6; + goto out5; err = nfs_init_inodecache(); if (err) - goto out5; + goto out4; err = nfs_init_readpagecache(); if (err) - goto out4; + goto out3; err = nfs_init_writepagecache(); if (err) - goto out3; + goto out2; err = nfs_init_directcache(); if (err) - goto out2; + goto out1; #ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); #endif - -#ifdef CONFIG_NFS_V4 - err = init_nfs_v4(); - if (err) - goto out1; -#endif - if ((err = register_nfs_fs()) != 0) goto out0; return 0; out0: -#ifdef CONFIG_NFS_V4 - exit_nfs_v4(); -out1: -#endif #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); -out2: +out1: nfs_destroy_writepagecache(); -out3: +out2: nfs_destroy_readpagecache(); -out4: +out3: nfs_destroy_inodecache(); -out5: +out4: nfs_destroy_nfspagecache(); -out6: +out5: nfs_fs_proc_exit(); -out7: +out6: nfsiod_stop(); -out8: +out7: nfs_fscache_unregister(); -out9: +out8: unregister_pernet_subsys(&nfs_net_ops); -out10: +out9: nfs_dns_resolver_destroy(); -out11: +out10: return err; } static void __exit exit_nfs_fs(void) { -#ifdef CONFIG_NFS_V4 - exit_nfs_v4(); -#endif nfs_destroy_directcache(); nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cfafd13b6fe..31fdb03225c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -90,7 +90,7 @@ struct nfs_client_initdata { const char *hostname; const struct sockaddr *addr; size_t addrlen; - const struct nfs_rpc_ops *rpc_ops; + struct nfs_subversion *nfs_mod; int proto; u32 minorversion; struct net *net; @@ -187,12 +187,11 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *); -extern struct nfs_server *nfs_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); +extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_mount_info *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern void nfs_free_server(struct nfs_server *server); @@ -224,6 +223,13 @@ static inline void nfs_fs_proc_exit(void) int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); #endif +/* nfs3client.c */ +#if IS_ENABLED(CONFIG_NFS_V3) +struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); +struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); +#endif + /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; @@ -256,7 +262,7 @@ extern int nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); /* nfs4xdr.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); #endif @@ -266,7 +272,7 @@ extern const u32 nfs41_maxwrite_overhead; #endif /* nfs4proc.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct rpc_procinfo nfs4_procedures[]; #endif @@ -313,24 +319,26 @@ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); +extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); -#ifdef CONFIG_NFS_V4 -extern void nfs4_evict_inode(struct inode *); -#endif void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ +extern const struct super_operations nfs_sops; +extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, + struct nfs_subversion *); void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(struct file_system_type *, struct nfs_server *, - int, const char *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, + struct nfs_mount_info *, struct nfs_subversion *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); @@ -356,7 +364,7 @@ struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, const char *); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, const char *); @@ -546,13 +554,14 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) static inline unsigned int nfs_page_length(struct page *page) { - loff_t i_size = i_size_read(page->mapping->host); + loff_t i_size = i_size_read(page_file_mapping(page)->host); if (i_size > 0) { + pgoff_t page_index = page_file_index(page); pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (page->index < end_index) + if (page_index < end_index) return PAGE_CACHE_SIZE; - if (page->index == end_index) + if (page_index == end_index) return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1; } return 0; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 08b9c93675d..655925373b9 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -7,6 +7,7 @@ * NFS namespace */ +#include <linux/module.h> #include <linux/dcache.h> #include <linux/gfp.h> #include <linux/mount.h> @@ -112,6 +113,7 @@ Elong_unlock: Elong: return ERR_PTR(-ENAMETOOLONG); } +EXPORT_SYMBOL_GPL(nfs_path); /* * nfs_d_automount - Handle crossing a mountpoint on the server @@ -195,20 +197,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { -#ifdef CONFIG_NFS_V4 - struct vfsmount *mnt = ERR_PTR(-EINVAL); - switch (server->nfs_client->rpc_ops->version) { - case 2: - case 3: - mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); - break; - case 4: - mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); - } - return mnt; -#else return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); -#endif } /** @@ -253,6 +242,7 @@ out: dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } +EXPORT_SYMBOL_GPL(nfs_do_submount); struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr) @@ -268,3 +258,4 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); } +EXPORT_SYMBOL_GPL(nfs_submount); diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 8a6394edb8b..0539de1b8d1 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -20,7 +20,7 @@ struct nfs_net { wait_queue_head_t bl_wq; struct list_head nfs_client_list; struct list_head nfs_volume_list; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct idr cb_ident_idr; /* Protected by nfs_client_lock */ #endif spinlock_t nfs_client_lock; diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h new file mode 100644 index 00000000000..43679df56cd --- /dev/null +++ b/fs/nfs/nfs.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + * + * Function and structures exported by the NFS module + * for use by NFS version-specific modules. + */ +#ifndef __LINUX_INTERNAL_NFS_H +#define __LINUX_INTERNAL_NFS_H + +#include <linux/fs.h> +#include <linux/sunrpc/sched.h> +#include <linux/nfs_xdr.h> + +struct nfs_subversion { + struct module *owner; /* THIS_MODULE pointer */ + struct file_system_type *nfs_fs; /* NFS filesystem type */ + const struct rpc_version *rpc_vers; /* NFS version information */ + const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ + const struct super_operations *sops; /* NFS Super operations */ + const struct xattr_handler **xattr; /* NFS xattr handlers */ + struct list_head list; /* List of NFS versions */ +}; + +struct nfs_subversion *get_nfs_version(unsigned int); +void put_nfs_version(struct nfs_subversion *); +void register_nfs_version(struct nfs_subversion *); +void unregister_nfs_version(struct nfs_subversion *); + +#endif /* __LINUX_INTERNAL_NFS_H */ diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c new file mode 100644 index 00000000000..0a9782c9171 --- /dev/null +++ b/fs/nfs/nfs2super.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include <linux/module.h> +#include <linux/nfs_fs.h> +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v2 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version2, + .rpc_ops = &nfs_v2_clientops, + .sops = &nfs_sops, +}; + +static int __init init_nfs_v2(void) +{ + register_nfs_version(&nfs_v2); + return 0; +} + +static void __exit exit_nfs_v2(void) +{ + unregister_nfs_version(&nfs_v2); +} + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v2); +module_exit(exit_nfs_v2); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c new file mode 100644 index 00000000000..b3fc65ef39c --- /dev/null +++ b/fs/nfs/nfs3client.c @@ -0,0 +1,65 @@ +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include "internal.h" + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static const struct rpc_version *nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +const struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; + +/* + * Initialise an NFSv3 ACL client connection + */ +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) +{ + struct nfs_server *server = nfs_create_server(mount_info, nfs_mod); + /* Create a client RPC handle for the NFS v3 ACL management interface */ + if (!IS_ERR(server)) + nfs_init_server_aclclient(server); + return server; +} + +struct nfs_server *nfs3_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr, + rpc_authflavor_t flavor) +{ + struct nfs_server *server = nfs_clone_server(source, fh, fattr, flavor); + if (!IS_ERR(server) && !IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + return server; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 65d23eb92fe..0952c791df3 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -925,6 +925,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs3_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, @@ -968,4 +969,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, + .create_server = nfs3_create_server, + .clone_server = nfs3_clone_server, }; diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c new file mode 100644 index 00000000000..cc471c72523 --- /dev/null +++ b/fs/nfs/nfs3super.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include <linux/module.h> +#include <linux/nfs_fs.h> +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v3 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version3, + .rpc_ops = &nfs_v3_clientops, + .sops = &nfs_sops, +}; + +static int __init init_nfs_v3(void) +{ + register_nfs_version(&nfs_v3); + return 0; +} + +static void __exit exit_nfs_v3(void) +{ + unregister_nfs_version(&nfs_v3); +} + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v3); +module_exit(exit_nfs_v3); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5511690de8a..3b950dd81e8 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,7 +9,7 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct idmap; @@ -205,9 +205,6 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); -/* write.c */ -int nfs4_write_inode(struct inode *, struct writeback_control *); - /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); @@ -366,9 +363,11 @@ extern const nfs4_stateid zero_stateid; /* nfs4super.c */ struct nfs_mount_info; -struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); -int init_nfs_v4(void); -void exit_nfs_v4(void); +extern struct nfs_subversion nfs_v4; +struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); +extern bool nfs4_disable_idmapping; +extern unsigned short max_session_slots; +extern unsigned short send_implementation_id; /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 1c3f13c8e47..cbcdfaf3250 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -18,11 +18,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT /* - * Turn off NFSv4 uid/gid mapping when using AUTH_SYS - */ -static bool nfs4_disable_idmapping = true; - -/* * Get a unique NFSv4.0 callback identifier which will be used * by the V4.0 callback service to lookup the nfs_client struct */ @@ -357,7 +352,7 @@ static int nfs4_set_client(struct nfs_server *server, .hostname = hostname, .addr = addr, .addrlen = addrlen, - .rpc_ops = &nfs_v4_clientops, + .nfs_mod = &nfs_v4, .proto = proto, .minorversion = minorversion, .net = net, @@ -411,7 +406,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, struct nfs_client_initdata cl_init = { .addr = ds_addr, .addrlen = ds_addrlen, - .rpc_ops = &nfs_v4_clientops, + .nfs_mod = &nfs_v4, .proto = ds_proto, .minorversion = mds_clp->cl_minorversion, .net = mds_clp->cl_net, @@ -574,8 +569,10 @@ error: * Create a version 4 volume record * - keyed on server and FSID */ -struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) +/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh)*/ +struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct nfs_server *server; int error; @@ -587,11 +584,11 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, return ERR_PTR(-ENOMEM); /* set up the general RPC client */ - error = nfs4_init_server(server, data); + error = nfs4_init_server(server, mount_info->parsed); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mntfh); + error = nfs4_server_common_setup(server, mount_info->mntfh); if (error < 0) goto error; @@ -657,7 +654,3 @@ error: dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); } - -module_param(nfs4_disable_idmapping, bool, 0644); -MODULE_PARM_DESC(nfs4_disable_idmapping, - "Turn off NFSv4 idmapping when using 'sec=sys'"); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6843e0a37de..a99a8d94872 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -72,8 +72,6 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) -static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; - struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); @@ -6870,6 +6868,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, + .try_mount = nfs4_try_mount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, @@ -6915,6 +6914,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, + .create_server = nfs4_create_server, + .clone_server = nfs_clone_server, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { @@ -6929,10 +6930,6 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { NULL }; -module_param(max_session_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " - "requests the client will negotiate"); - /* * Local variables: * c-basic-offset: 8 diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 59264fb335c..12a31a9dbcd 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -6,15 +6,18 @@ #include <linux/nfs_idmap.h> #include <linux/nfs4_mount.h> #include <linux/nfs_fs.h> +#include "delegation.h" #include "internal.h" #include "nfs4_fs.h" +#include "pnfs.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); +static void nfs4_evict_inode(struct inode *inode); static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, @@ -36,14 +39,6 @@ static struct file_system_type nfs4_remote_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type nfs4_xdev_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_xdev_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - static struct file_system_type nfs4_remote_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -75,21 +70,48 @@ static const struct super_operations nfs4_sops = { .remount_fs = nfs_remount, }; +struct nfs_subversion nfs_v4 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs4_fs_type, + .rpc_vers = &nfs_version4, + .rpc_ops = &nfs_v4_clientops, + .sops = &nfs4_sops, + .xattr = nfs4_xattr_handlers, +}; + +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret = nfs_write_inode(inode, wbc); + + if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { + int status; + bool sync = true; + + if (wbc->sync_mode == WB_SYNC_NONE) + sync = false; + + status = pnfs_layoutcommit_inode(inode, sync); + if (status < 0) + return status; + } + return ret; +} + /* - * Set up an NFS4 superblock + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). */ -static void nfs4_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) +static void nfs4_evict_inode(struct inode *inode) { - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = nfs4_xattr_handlers; - nfs_initialise_sb(sb); + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + pnfs_return_layout(inode); + pnfs_destroy_layout(NFS_I(inode)); + /* If we are holding a delegation, return it! */ + nfs_inode_return_delegation_noreclaim(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); } /* @@ -103,17 +125,16 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); - mount_info->fill_super = nfs4_fill_super; mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ - server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); + server = nfs4_create_server(mount_info, &nfs_v4); if (IS_ERR(server)) { mntroot = ERR_CAST(server); goto out; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); out: return mntroot; @@ -228,7 +249,8 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, } struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { char *export_path; struct vfsmount *root_mnt; @@ -237,8 +259,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - mount_info->fill_super = nfs4_fill_super; - export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, @@ -253,27 +273,12 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, return res; } -/* - * Clone an NFS4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs4_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); -} - static struct dentry * nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { - .fill_super = nfs4_fill_super, + .fill_super = nfs_fill_super, .set_security = nfs_clone_sb_security, .cloned = raw_data, }; @@ -293,7 +298,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, goto out; } - mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); out: nfs_free_fhandle(mount_info.mntfh); return mntroot; @@ -327,7 +332,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, } -int __init init_nfs_v4(void) +static int __init init_nfs_v4(void) { int err; @@ -343,6 +348,7 @@ int __init init_nfs_v4(void) if (err < 0) goto out2; + register_nfs_version(&nfs_v4); return 0; out2: nfs4_unregister_sysctl(); @@ -352,9 +358,15 @@ out: return err; } -void exit_nfs_v4(void) +static void __exit exit_nfs_v4(void) { + unregister_nfs_version(&nfs_v4); unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v4); +module_exit(exit_nfs_v4); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6cbd602e26d..ca13483edd6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -852,12 +852,6 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ -static unsigned short send_implementation_id = 1; - -module_param(send_implementation_id, ushort, 0644); -MODULE_PARM_DESC(send_implementation_id, - "Send implementation ID with NFSv4.1 exchange_id"); - static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, [NF4REG] = S_IFREG, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index aed913c833f..1a6732ed04a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -54,6 +54,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, if (hdr->completion_ops->init_hdr) hdr->completion_ops->init_hdr(hdr); } +EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { @@ -70,7 +71,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) static inline struct nfs_page * nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); if (p) INIT_LIST_HEAD(&p->wb_list); return p; @@ -117,7 +118,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; - req->wb_index = page->index; + req->wb_index = page_file_index(page); page_cache_get(page); req->wb_offset = offset; req->wb_pgbase = offset; @@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_lseg = NULL; desc->pg_dreq = NULL; } +EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_can_coalesce_requests - test two requests for compatibility @@ -409,6 +411,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } while (ret); return ret; } +EXPORT_SYMBOL_GPL(nfs_pageio_add_request); /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor @@ -424,6 +427,7 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) break; } } +EXPORT_SYMBOL_GPL(nfs_pageio_complete); /** * nfs_pageio_cond_complete - Conditional I/O completion diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7fbd25afe41..76875bfcf19 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1407,6 +1407,7 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) put_lseg(hdr->lseg); nfs_writehdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) @@ -1561,6 +1562,7 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) put_lseg(hdr->lseg); nfs_readhdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4d3356af330..50a88c3546e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -774,6 +774,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, @@ -816,4 +817,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, + .create_server = nfs_create_server, + .clone_server = nfs_clone_server, }; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6267b873bbc..b6bdb18e892 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -48,6 +48,7 @@ struct nfs_read_header *nfs_readhdr_alloc(void) } return rhdr; } +EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -80,6 +81,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) kmem_cache_free(nfs_rdata_cachep, rhdr); } +EXPORT_SYMBOL_GPL(nfs_readhdr_free); void nfs_readdata_release(struct nfs_read_data *rdata) { @@ -96,6 +98,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata) if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } +EXPORT_SYMBOL_GPL(nfs_readdata_release); static int nfs_return_empty_page(struct page *page) @@ -113,6 +116,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, NFS_SERVER(inode)->rsize, 0); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { @@ -397,6 +401,7 @@ int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, return nfs_pagein_multi(desc, hdr); return nfs_pagein_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_pagein); static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { @@ -522,11 +527,11 @@ static const struct rpc_call_ops nfs_read_common_ops = { int nfs_readpage(struct file *file, struct page *page) { struct nfs_open_context *ctx; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int error; dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_CACHE_SIZE, page->index); + page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); nfs_add_stats(inode, NFSIOS_READPAGES, 1); @@ -580,7 +585,7 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *new; unsigned int len; int error; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 95866a8c21b..ac6a3c55dce 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -64,11 +64,12 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_TEXT_DATA 1 -#ifdef CONFIG_NFS_V3 +#if IS_ENABLED(CONFIG_NFS_V3) #define NFS_DEFAULT_VERSION 3 #else #define NFS_DEFAULT_VERSION 2 @@ -281,13 +282,14 @@ static match_table_t nfs_vers_tokens = { static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs_fs_type = { +struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, .name = "nfs", .mount = nfs_fs_mount, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, @@ -297,7 +299,7 @@ struct file_system_type nfs_xdev_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -static const struct super_operations nfs_sops = { +const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, @@ -311,8 +313,9 @@ static const struct super_operations nfs_sops = { .show_stats = nfs_show_stats, .remount_fs = nfs_remount, }; +EXPORT_SYMBOL_GPL(nfs_sops); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); @@ -363,6 +366,7 @@ void nfs_sb_active(struct super_block *sb) if (atomic_inc_return(&server->active) == 1) atomic_inc(&sb->s_active); } +EXPORT_SYMBOL_GPL(nfs_sb_active); void nfs_sb_deactive(struct super_block *sb) { @@ -371,6 +375,7 @@ void nfs_sb_deactive(struct super_block *sb) if (atomic_dec_and_test(&server->active)) deactivate_super(sb); } +EXPORT_SYMBOL_GPL(nfs_sb_deactive); /* * Deliver file system statistics to userspace @@ -436,6 +441,7 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) dprintk("%s: statfs error = %d\n", __func__, -error); return error; } +EXPORT_SYMBOL_GPL(nfs_statfs); /* * Map the security flavour number to a name @@ -541,7 +547,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_mountd_netid(m, nfss, showdefaults); } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { @@ -672,8 +678,9 @@ int nfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_options); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) #ifdef CONFIG_NFS_V4_1 static void show_sessions(struct seq_file *m, struct nfs_server *server) { @@ -706,7 +713,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) } } #else -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void show_pnfs(struct seq_file *m, struct nfs_server *server) { } @@ -731,12 +738,14 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root) free_page((unsigned long)page); return err; } +EXPORT_SYMBOL_GPL(nfs_show_devname); int nfs_show_path(struct seq_file *m, struct dentry *dentry) { seq_puts(m, "/"); return 0; } +EXPORT_SYMBOL_GPL(nfs_show_path); /* * Present statistical information for this VFS mountpoint @@ -771,7 +780,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, ",bsize=%u", nfss->bsize); seq_printf(m, ",namlen=%u", nfss->namelen); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) if (nfss->nfs_client->rpc_ops->version == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); @@ -829,6 +838,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_stats); /* * Begin unmount by attempting to remove all automounted mountpoints we added @@ -848,6 +858,7 @@ void nfs_umount_begin(struct super_block *sb) if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } +EXPORT_SYMBOL_GPL(nfs_umount_begin); static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) { @@ -1649,8 +1660,9 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return nfs_walk_authlist(args, &request); } -static struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +struct dentry *nfs_try_mount(int flags, const char *dev_name, + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { int status; struct nfs_server *server; @@ -1662,12 +1674,13 @@ static struct dentry *nfs_try_mount(int flags, const char *dev_name, } /* Get a volume representation */ - server = nfs_create_server(mount_info->parsed, mount_info->mntfh); + server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (IS_ERR(server)) return ERR_CAST(server); - return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info); + return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); } +EXPORT_SYMBOL_GPL(nfs_try_mount); /* * Split "dev_name" into "hostname:export_path". @@ -1871,7 +1884,7 @@ static int nfs23_validate_mount_data(void *options, return NFS_TEXT_DATA; } -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) if (args->version == 3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ @@ -1891,7 +1904,7 @@ out_no_sec: dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); return -EINVAL; -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) out_v3_not_compiled: dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); return -EPROTONOSUPPORT; @@ -1910,7 +1923,7 @@ out_invalid_fh: return -EINVAL; } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static int nfs_validate_mount_data(struct file_system_type *fs_type, void *options, struct nfs_parsed_mount_data *args, @@ -1948,7 +1961,7 @@ static int nfs_validate_text_mount_data(void *options, goto out_no_address; if (args->version == 4) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; @@ -1971,7 +1984,7 @@ static int nfs_validate_text_mount_data(void *options, &args->nfs_server.export_path, max_pathlen); -#ifndef CONFIG_NFS_V4 +#if !IS_ENABLED(CONFIG_NFS_V4) out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); return -EPROTONOSUPPORT; @@ -2070,6 +2083,7 @@ out: kfree(data); return error; } +EXPORT_SYMBOL_GPL(nfs_remount); /* * Initialise the common bits of the superblock @@ -2103,10 +2117,12 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_blocksize_bits = 0; sb->s_blocksize = 0; - if (data->bsize) + sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; + sb->s_op = server->nfs_client->cl_nfs_mod->sops; + if (data && data->bsize) sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (server->nfs_client->rpc_ops->version == 3) { + if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ @@ -2114,9 +2130,9 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_time_gran = 1; } - sb->s_op = &nfs_sops; nfs_initialise_sb(sb); } +EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock @@ -2286,6 +2302,7 @@ int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, { return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); } +EXPORT_SYMBOL_GPL(nfs_set_sb_security); int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) @@ -2296,11 +2313,12 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return -ESTALE; return 0; } +EXPORT_SYMBOL_GPL(nfs_clone_sb_security); -struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, - struct nfs_server *server, +struct dentry *nfs_fs_mount_common(struct nfs_server *server, int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2319,7 +2337,7 @@ struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata); + s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; @@ -2369,6 +2387,7 @@ error_splat_bdi: deactivate_locked_super(s); goto out; } +EXPORT_SYMBOL_GPL(nfs_fs_mount_common); struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) @@ -2378,6 +2397,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, .set_security = nfs_set_sb_security, }; struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod; int error; mount_info.parsed = nfs_alloc_parsed_mount_data(); @@ -2394,18 +2414,21 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, goto out; } -#ifdef CONFIG_NFS_V4 - if (mount_info.parsed->version == 4) - mntroot = nfs4_try_mount(flags, dev_name, &mount_info); - else -#endif /* CONFIG_NFS_V4 */ - mntroot = nfs_try_mount(flags, dev_name, &mount_info); + nfs_mod = get_nfs_version(mount_info.parsed->version); + if (IS_ERR(nfs_mod)) { + mntroot = ERR_CAST(nfs_mod); + goto out; + } + mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); + + put_nfs_version(nfs_mod); out: nfs_free_parsed_mount_data(mount_info.parsed); nfs_free_fhandle(mount_info.mntfh); return mntroot; } +EXPORT_SYMBOL_GPL(nfs_fs_mount); /* * Ensure that we unregister the bdi before kill_anon_super @@ -2417,6 +2440,7 @@ void nfs_put_super(struct super_block *s) bdi_unregister(&server->backing_dev_info); } +EXPORT_SYMBOL_GPL(nfs_put_super); /* * Destroy an NFS2/3 superblock @@ -2429,31 +2453,38 @@ void nfs_kill_super(struct super_block *s) nfs_fscache_release_super_cookie(s); nfs_free_server(server); } +EXPORT_SYMBOL_GPL(nfs_kill_super); /* * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ struct dentry * -nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, - const char *dev_name, struct nfs_mount_info *mount_info) +nfs_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { - struct nfs_clone_mount *data = mount_info->cloned; + struct nfs_clone_mount *data = raw_data; + struct nfs_mount_info mount_info = { + .fill_super = nfs_clone_super, + .set_security = nfs_clone_sb_security, + .cloned = data, + }; struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; int error; dprintk("--> nfs_xdev_mount_common()\n"); - mount_info->mntfh = data->fh; + mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); + server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); dprintk("<-- nfs_xdev_mount_common() = 0\n"); out: return mntroot; @@ -2463,22 +2494,7 @@ out_err: goto out; } -/* - * Clone an NFS2/3 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info); -} - -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { @@ -2577,4 +2593,56 @@ out_no_address: return -EINVAL; } +/* + * NFS v4 module parameters need to stay in the + * NFS client for backwards compatibility + */ +unsigned int nfs_callback_set_tcpport; +unsigned short nfs_callback_tcpport; +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600; +/* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ +bool nfs4_disable_idmapping = true; +unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; +unsigned short send_implementation_id = 1; + +EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); +EXPORT_SYMBOL_GPL(nfs_callback_tcpport); +EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); +EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); +EXPORT_SYMBOL_GPL(max_session_slots); +EXPORT_SYMBOL_GPL(send_implementation_id); + +#define NFS_CALLBACK_MAXPORTNR (65535U) + +static int param_set_portnr(const char *val, const struct kernel_param *kp) +{ + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, +}; +#define param_check_portnr(name, p) __param_check(name, p, unsigned int); + +module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +module_param(nfs_idmap_cache_timeout, int, 0644); +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off NFSv4 idmapping when using 'sec=sys'"); +module_param(max_session_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " + "requests the client will negotiate"); +module_param(send_implementation_id, ushort, 0644); +MODULE_PARM_DESC(send_implementation_id, + "Send implementation ID with NFSv4.1 exchange_id"); #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f312860c15d..5829d0ce7cf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -52,7 +52,7 @@ static mempool_t *nfs_commit_mempool; struct nfs_commit_data *nfs_commitdata_alloc(void) { - struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); + struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); if (p) { memset(p, 0, sizeof(*p)); @@ -70,7 +70,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); struct nfs_write_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); + struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) { struct nfs_pgio_header *hdr = &p->header; @@ -84,6 +84,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void) } return p; } +EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -115,6 +116,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); mempool_free(whdr, nfs_wdata_mempool); } +EXPORT_SYMBOL_GPL(nfs_writehdr_free); void nfs_writedata_release(struct nfs_write_data *wdata) { @@ -131,6 +133,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata) if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } +EXPORT_SYMBOL_GPL(nfs_writedata_release); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -139,25 +142,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } -static struct nfs_page *nfs_page_find_request_locked(struct page *page) +static struct nfs_page * +nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) { struct nfs_page *req = NULL; - if (PagePrivate(page)) { + if (PagePrivate(page)) req = (struct nfs_page *)page_private(page); - if (req != NULL) - kref_get(&req->wb_kref); + else if (unlikely(PageSwapCache(page))) { + struct nfs_page *freq, *t; + + /* Linearly search the commit list for the correct req */ + list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { + if (freq->wb_page == page) { + req = freq; + break; + } + } } + + if (req) + kref_get(&req->wb_kref); + return req; } static struct nfs_page *nfs_page_find_request(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req = NULL; spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); spin_unlock(&inode->i_lock); return req; } @@ -165,16 +181,16 @@ static struct nfs_page *nfs_page_find_request(struct page *page) /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (i_size > 0 && page->index < end_index) + if (i_size > 0 && page_file_index(page) < end_index) goto out; - end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + end = page_file_offset(page) + ((loff_t)offset+count); if (i_size >= end) goto out; i_size_write(inode, end); @@ -187,7 +203,7 @@ out: static void nfs_set_pageerror(struct page *page) { SetPageError(page); - nfs_zap_mapping(page->mapping->host, page->mapping); + nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } /* We can set the PG_uptodate flag if we see that a write request @@ -228,7 +244,7 @@ static int nfs_set_page_writeback(struct page *page) int ret = test_set_page_writeback(page); if (!ret) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_server *nfss = NFS_SERVER(inode); if (atomic_long_inc_return(&nfss->writeback) > @@ -242,7 +258,7 @@ static int nfs_set_page_writeback(struct page *page) static void nfs_end_page_writeback(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); @@ -252,13 +268,13 @@ static void nfs_end_page_writeback(struct page *page) static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; int ret; spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) break; if (nfs_lock_request(req)) @@ -313,13 +329,13 @@ out: static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - nfs_pageio_cond_complete(pgio, page->index); + nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); @@ -336,7 +352,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - NFS_PROTO(page->mapping->host)->write_pageio_init(&pgio, + NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, page->mapping->host, wb_priority(wbc), &nfs_async_write_completion_ops); @@ -413,9 +429,15 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) spin_lock(&inode->i_lock); if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; - set_bit(PG_MAPPED, &req->wb_flags); - SetPagePrivate(req->wb_page); - set_page_private(req->wb_page, (unsigned long)req); + /* + * Swap-space should not get truncated. Hence no need to plug the race + * with invalidate/truncate. + */ + if (likely(!PageSwapCache(req->wb_page))) { + set_bit(PG_MAPPED, &req->wb_flags); + SetPagePrivate(req->wb_page); + set_page_private(req->wb_page, (unsigned long)req); + } nfsi->npages++; kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -432,9 +454,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (likely(!PageSwapCache(req->wb_page))) { + set_page_private(req->wb_page, 0); + ClearPagePrivate(req->wb_page); + clear_bit(PG_MAPPED, &req->wb_flags); + } nfsi->npages--; spin_unlock(&inode->i_lock); nfs_release_request(req); @@ -446,7 +470,7 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page @@ -471,7 +495,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, spin_unlock(cinfo->lock); if (!cinfo->dreq) { inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(req->wb_page->mapping->backing_dev_info, + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); @@ -538,7 +562,7 @@ static void nfs_clear_page_commit(struct page *page) { dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); } static void @@ -636,7 +660,7 @@ out: hdr->release(hdr); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -730,7 +754,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) goto out_unlock; @@ -789,7 +813,7 @@ out_err: static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; req = nfs_try_to_update_request(inode, page, offset, bytes); @@ -842,7 +866,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_page(page->mapping->host, page); + status = nfs_wb_page(page_file_mapping(page)->host, page); } while (status == 0); return status; } @@ -872,7 +896,7 @@ int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); @@ -880,7 +904,7 @@ int nfs_updatepage(struct file *file, struct page *page, dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, - (long long)(page_offset(page) + offset)); + (long long)(page_file_offset(page) + offset)); /* If we're not using byte range locks, and we know the page * is up to date, it may be more efficient to extend the write @@ -1173,6 +1197,7 @@ int nfs_generic_flush(struct nfs_pageio_descriptor *desc, return nfs_flush_multi(desc, hdr); return nfs_flush_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_flush); static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { @@ -1210,6 +1235,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, NFS_SERVER(inode)->wsize, ioflags); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { @@ -1297,7 +1323,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) return; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we @@ -1357,7 +1383,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; @@ -1469,7 +1495,7 @@ void nfs_retry_commit(struct list_head *page_list, nfs_mark_request_commit(req, lseg, cinfo); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); } nfs_unlock_and_release_request(req); @@ -1673,26 +1699,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { return nfs_commit_unstable_pages(inode, wbc); } - -#ifdef CONFIG_NFS_V4 -int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int ret = nfs_write_inode(inode, wbc); - - if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { - int status; - bool sync = true; - - if (wbc->sync_mode == WB_SYNC_NONE) - sync = false; - - status = pnfs_layoutcommit_inode(inode, sync); - if (status < 0) - return status; - } - return ret; -} -#endif +EXPORT_SYMBOL_GPL(nfs_write_inode); /* * flush the inode to disk. @@ -1708,6 +1715,7 @@ int nfs_wb_all(struct inode *inode) return sync_inode(inode, &wbc); } +EXPORT_SYMBOL_GPL(nfs_wb_all); int nfs_wb_page_cancel(struct inode *inode, struct page *page) { @@ -1744,7 +1752,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) */ int nfs_wb_page(struct inode *inode, struct page *page) { - loff_t range_start = page_offset(page); + loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5ff0b7b9fc0..43295d45cc2 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (status < 0) return; + status = mnt_want_write_file(rec_file); + if (status) + return; + dir = rec_file->f_path.dentry; /* lock the parent */ mutex_lock(&dir->d_inode->i_mutex); @@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = mnt_want_write_file(rec_file); - if (status) - goto out_put; status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); - mnt_drop_write_file(rec_file); out_put: dput(dentry); out_unlock: @@ -189,6 +189,7 @@ out_unlock: " (err %d); please check that %s exists" " and is writeable", status, user_recovery_dirname); + mnt_drop_write_file(rec_file); nfs4_reset_creds(original_cred); } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cc793005a87..032af381b3a 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp) fhp->fh_post_saved = 0; #endif } + fh_drop_write(fhp); if (exp) { exp_put(exp); fhp->fh_export = NULL; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index e15dc45fc5e..aad6d457b9e 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, struct dentry *dchild; int type, mode; __be32 nfserr; + int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); dprintk("nfsd: CREATE %s %.*s\n", @@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, nfserr = nfserr_exist; if (isdotent(argp->name, argp->len)) goto done; + hosterr = fh_want_write(dirfhp); + if (hosterr) { + nfserr = nfserrno(hosterr); + goto done; + } + fh_lock_nested(dirfhp, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { @@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, out_unlock: /* We don't really need to unlock, as fh_put does it. */ fh_unlock(dirfhp); - + fh_drop_write(dirfhp); done: fh_put(dirfhp); return nfsd_return_dirop(nfserr, resp); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 702f64e820c..a9269f142cc 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1284,6 +1284,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, * If it has, the parent directory should already be locked. */ if (!resfhp->fh_dentry) { + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ fh_lock_nested(fhp, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); @@ -1327,14 +1331,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - /* * Get the dir op function pointer. */ err = 0; + host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(dirp, dchild, iap->ia_mode, true); @@ -1351,10 +1352,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); break; } - if (host_err < 0) { - fh_drop_write(fhp); + if (host_err < 0) goto out_nfserr; - } err = nfsd_create_setattr(rqstp, resfhp, iap); @@ -1366,7 +1365,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err2 = nfserrno(commit_metadata(fhp)); if (err2) err = err2; - fh_drop_write(fhp); /* * Update the file handle to get the new inode info. */ @@ -1425,6 +1423,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_notdir; if (!dirp->i_op->lookup) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); /* @@ -1457,9 +1460,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, v_atime = verifier[1]&0x7fffffff; } - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; if (dchild->d_inode) { err = 0; @@ -1530,7 +1530,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!err) err = nfserrno(commit_metadata(fhp)); - fh_drop_write(fhp); /* * Update the filehandle to get the new inode info. */ @@ -1541,6 +1540,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_unlock(fhp); if (dchild && !IS_ERR(dchild)) dput(dchild); + fh_drop_write(fhp); return err; out_nfserr: @@ -1621,6 +1621,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock(fhp); dentry = fhp->fh_dentry; dnew = lookup_one_len(fname, dentry, flen); @@ -1628,10 +1633,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - if (unlikely(path[plen] != 0)) { char *path_alloced = kmalloc(plen+1, GFP_KERNEL); if (path_alloced == NULL) @@ -1691,6 +1692,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (isdotent(name, len)) goto out; + host_err = fh_want_write(tfhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = ddir->d_inode; @@ -1702,18 +1709,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, dold = tfhp->fh_dentry; - host_err = fh_want_write(tfhp); - if (host_err) { - err = nfserrno(host_err); - goto out_dput; - } err = nfserr_noent; if (!dold->d_inode) - goto out_drop_write; + goto out_dput; host_err = nfsd_break_lease(dold->d_inode); if (host_err) { err = nfserrno(host_err); - goto out_drop_write; + goto out_dput; } host_err = vfs_link(dold, dirp, dnew); if (!host_err) { @@ -1726,12 +1728,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } -out_drop_write: - fh_drop_write(tfhp); out_dput: dput(dnew); out_unlock: fh_unlock(ffhp); + fh_drop_write(tfhp); out: return err; @@ -1774,6 +1775,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; + host_err = fh_want_write(ffhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); @@ -1804,17 +1811,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = -EXDEV; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out_dput_new; - host_err = fh_want_write(ffhp); - if (host_err) - goto out_dput_new; host_err = nfsd_break_lease(odentry->d_inode); if (host_err) - goto out_drop_write; + goto out_dput_new; if (ndentry->d_inode) { host_err = nfsd_break_lease(ndentry->d_inode); if (host_err) - goto out_drop_write; + goto out_dput_new; } host_err = vfs_rename(fdir, odentry, tdir, ndentry); if (!host_err) { @@ -1822,8 +1826,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!host_err) host_err = commit_metadata(ffhp); } -out_drop_write: - fh_drop_write(ffhp); out_dput_new: dput(ndentry); out_dput_old: @@ -1839,6 +1841,7 @@ out_drop_write: fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = 0; + fh_drop_write(ffhp); out: return err; @@ -1864,6 +1867,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (err) goto out; + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = dentry->d_inode; @@ -1882,21 +1889,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = rdentry->d_inode->i_mode & S_IFMT; - host_err = fh_want_write(fhp); - if (host_err) - goto out_put; - host_err = nfsd_break_lease(rdentry->d_inode); if (host_err) - goto out_drop_write; + goto out_put; if (type != S_IFDIR) host_err = vfs_unlink(dirp, rdentry); else host_err = vfs_rmdir(dirp, rdentry); if (!host_err) host_err = commit_metadata(fhp); -out_drop_write: - fh_drop_write(fhp); out_put: dput(rdentry); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index ec0611b2b73..359594c393d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); static inline int fh_want_write(struct svc_fh *fh) { - return mnt_want_write(fh->fh_export->ex_path.mnt); + int ret = mnt_want_write(fh->fh_export->ex_path.mnt); + + if (!ret) + fh->fh_want_write = 1; + return ret; } static inline void fh_drop_write(struct svc_fh *fh) { - mnt_drop_write(fh->fh_export->ex_path.mnt); + if (fh->fh_want_write) { + fh->fh_want_write = 0; + mnt_drop_write(fh->fh_export->ex_path.mnt); + } } #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 62cebc8e1a1..a4d56ac02e6 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_dentry->d_inode; struct nilfs_transaction_info ti; - int ret; + int ret = 0; if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ + sb_start_pagefault(inode->i_sb); lock_page(page); if (page->mapping != inode->i_mapping || page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); - return VM_FAULT_NOPAGE; /* make the VM retry the fault */ + ret = -EFAULT; /* make the VM retry the fault */ + goto out; } /* @@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); /* never returns -ENOMEM, but may return -ENOSPC */ if (unlikely(ret)) - return VM_FAULT_SIGBUS; + goto out; - ret = block_page_mkwrite(vma, vmf, nilfs_get_block); - if (ret != VM_FAULT_LOCKED) { + ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + if (ret) { nilfs_transaction_abort(inode->i_sb); - return ret; + goto out; } nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); nilfs_transaction_commit(inode->i_sb); mapped: wait_on_page_writeback(page); - return VM_FAULT_LOCKED; + out: + sb_end_pagefault(inode->i_sb); + return block_page_mkwrite_return(ret); } static const struct vm_operations_struct nilfs_file_vm_ops = { diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 0b6387c67e6..fdb18076948 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; } - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 88e11fb346b..a5752a58993 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb, if (ret > 0) return 0; - vfs_check_frozen(sb, SB_FREEZE_WRITE); + sb_start_intwrite(sb); nilfs = sb->s_fs_info; down_read(&nilfs->ns_segctor_sem); @@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb, current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return ret; } @@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb) err = nilfs_construct_segment(sb); if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); return err; } @@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb) current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); + sb_end_intwrite(sb); } void nilfs_relax_pressure_in_lock(struct super_block *sb) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 7389d2d5e51..1ecf46448f8 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, if (err) return err; pos = *ppos; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); /* We can write back this queue in page reclaim. */ current->backing_dev_info = mapping->backing_dev_info; written = 0; @@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); + sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); @@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0) ret = err; } + sb_end_write(inode->i_sb); return ret; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 7602783d7f4..46a1f6d7510 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, { struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int ret; if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && !ocfs2_writes_unwritten_extents(osb)) @@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); + ret = mnt_want_write_file(file); + if (ret) + return ret; + ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); + mnt_drop_write_file(file); + return ret; } static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, @@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (iocb->ki_left == 0) return 0; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + sb_start_write(inode->i_sb); appending = file->f_flags & O_APPEND ? 1 : 0; direct_io = file->f_flags & O_DIRECT ? 1 : 0; @@ -2436,6 +2442,7 @@ out_sems: ocfs2_iocb_clear_sem_locked(iocb); mutex_unlock(&inode->i_mutex); + sb_end_write(inode->i_sb); if (written) ret = written; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index d96f7f81d8d..f20edcbfe70 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (get_user(new_clusters, (int __user *)arg)) return -EFAULT; - return ocfs2_group_extend(inode, new_clusters); + status = mnt_want_write_file(filp); + if (status) + return status; + status = ocfs2_group_extend(inode, new_clusters); + mnt_drop_write_file(filp); + return status; case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: if (!capable(CAP_SYS_RESOURCE)) @@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (copy_from_user(&input, (int __user *) arg, sizeof(input))) return -EFAULT; - return ocfs2_group_add(inode, &input); + status = mnt_want_write_file(filp); + if (status) + return status; + status = ocfs2_group_add(inode, &input); + mnt_drop_write_file(filp); + return status; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 0a42ae96dca..2dd36af79e2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) if (journal_current_handle()) return jbd2_journal_start(journal, max_buffs); + sb_start_intwrite(osb->sb); + down_read(&osb->journal->j_trans_barrier); handle = jbd2_journal_start(journal, max_buffs); if (IS_ERR(handle)) { up_read(&osb->journal->j_trans_barrier); + sb_end_intwrite(osb->sb); mlog_errno(PTR_ERR(handle)); @@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, if (ret < 0) mlog_errno(ret); - if (!nested) + if (!nested) { up_read(&journal->j_trans_barrier); + sb_end_intwrite(osb->sb); + } return ret; } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 9cd41083e99..d150372fd81 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) sigset_t oldset; int ret; + sb_start_pagefault(inode->i_sb); ocfs2_block_signals(&oldset); /* @@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) out: ocfs2_unblock_signals(&oldset); + sb_end_pagefault(inode->i_sb); return ret; } diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9f32d7cbb7a..30a055049e1 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4466,20 +4466,11 @@ int ocfs2_reflink_ioctl(struct inode *inode, goto out_dput; } - error = mnt_want_write(new_path.mnt); - if (error) { - mlog_errno(error); - goto out_dput; - } - error = ocfs2_vfs_reflink(old_path.dentry, new_path.dentry->d_inode, new_dentry, preserve); - mnt_drop_write(new_path.mnt); out_dput: - dput(new_dentry); - mutex_unlock(&new_path.dentry->d_inode->i_mutex); - path_put(&new_path); + done_path_create(&new_path, new_dentry); out: path_put(&old_path); diff --git a/fs/open.c b/fs/open.c index 1e914b397e1..f3d96e7e7b1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) if (IS_APPEND(inode)) goto out_putf; + sb_start_write(inode->i_sb); error = locks_verify_truncate(inode, file, length); if (!error) error = security_path_truncate(&file->f_path); if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); + sb_end_write(inode->i_sb); out_putf: fput(file); out: @@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!file->f_op->fallocate) return -EOPNOTSUPP; - return file->f_op->fallocate(file, mode, offset, len); + sb_start_write(inode->i_sb); + ret = file->f_op->fallocate(file, mode, offset, len); + sb_end_write(inode->i_sb); + return ret; } SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) @@ -620,7 +625,7 @@ static inline int __get_file_write_access(struct inode *inode, /* * Balanced in __fput() */ - error = mnt_want_write(mnt); + error = __mnt_want_write(mnt); if (error) put_write_access(inode); } @@ -654,6 +659,7 @@ static int do_dentry_open(struct file *f, if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; + path_get(&f->f_path); inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, f->f_path.mnt); @@ -739,9 +745,7 @@ int finish_open(struct file *file, struct dentry *dentry, int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(file->f_path.mnt); - file->f_path.dentry = dget(dentry); - + file->f_path.dentry = dentry; error = do_dentry_open(file, open, current_cred()); if (!error) *opened |= FILE_OPENED; @@ -784,7 +788,6 @@ struct file *dentry_open(const struct path *path, int flags, f->f_flags = flags; f->f_path = *path; - path_get(&f->f_path); error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); diff --git a/fs/pipe.c b/fs/pipe.c index 95cbd6b227e..8d85d7068c1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1016,18 +1016,16 @@ fail_inode: return NULL; } -struct file *create_write_pipe(int flags) +int create_pipe_files(struct file **res, int flags) { int err; - struct inode *inode; + struct inode *inode = get_pipe_inode(); struct file *f; struct path path; - struct qstr name = { .name = "" }; + static struct qstr name = { .name = "" }; - err = -ENFILE; - inode = get_pipe_inode(); if (!inode) - goto err; + return -ENFILE; err = -ENOMEM; path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); @@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags) f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); if (!f) goto err_dentry; - f->f_mapping = inode->i_mapping; f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); - f->f_version = 0; - return f; + res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops); + if (!res[0]) + goto err_file; + + path_get(&path); + res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); + res[1] = f; + return 0; - err_dentry: +err_file: + put_filp(f); +err_dentry: free_pipe_info(inode); path_put(&path); - return ERR_PTR(err); + return err; - err_inode: +err_inode: free_pipe_info(inode); iput(inode); - err: - return ERR_PTR(err); -} - -void free_write_pipe(struct file *f) -{ - free_pipe_info(f->f_dentry->d_inode); - path_put(&f->f_path); - put_filp(f); -} - -struct file *create_read_pipe(struct file *wrf, int flags) -{ - /* Grab pipe from the writer */ - struct file *f = alloc_file(&wrf->f_path, FMODE_READ, - &read_pipefifo_fops); - if (!f) - return ERR_PTR(-ENFILE); - - path_get(&wrf->f_path); - f->f_flags = O_RDONLY | (flags & O_NONBLOCK); - - return f; + return err; } int do_pipe_flags(int *fd, int flags) { - struct file *fw, *fr; + struct file *files[2]; int error; int fdw, fdr; if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) return -EINVAL; - fw = create_write_pipe(flags); - if (IS_ERR(fw)) - return PTR_ERR(fw); - fr = create_read_pipe(fw, flags); - error = PTR_ERR(fr); - if (IS_ERR(fr)) - goto err_write_pipe; + error = create_pipe_files(files, flags); + if (error) + return error; error = get_unused_fd_flags(flags); if (error < 0) @@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags) fdw = error; audit_fd_pair(fdr, fdw); - fd_install(fdr, fr); - fd_install(fdw, fw); + fd_install(fdr, files[0]); + fd_install(fdw, files[1]); fd[0] = fdr; fd[1] = fdw; @@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags) err_fdr: put_unused_fd(fdr); err_read_pipe: - path_put(&fr->f_path); - put_filp(fr); - err_write_pipe: - free_write_pipe(fw); + fput(files[0]); + fput(files[1]); return error; } diff --git a/fs/splice.c b/fs/splice.c index 7bf08fa22ec..41514dd8946 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, }; ssize_t ret; + sb_start_write(inode->i_sb); + pipe_lock(pipe); splice_from_pipe_begin(&sd); @@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, *ppos += ret; balance_dirty_pages_ratelimited_nr(mapping, nr_pages); } + sb_end_write(inode->i_sb); return ret; } diff --git a/fs/super.c b/fs/super.c index 4c5d82f56ec..b05cf47463d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -33,12 +33,19 @@ #include <linux/rculist_bl.h> #include <linux/cleancache.h> #include <linux/fsnotify.h> +#include <linux/lockdep.h> #include "internal.h" LIST_HEAD(super_blocks); DEFINE_SPINLOCK(sb_lock); +static char *sb_writers_name[SB_FREEZE_LEVELS] = { + "sb_writers", + "sb_pagefaults", + "sb_internal", +}; + /* * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. @@ -62,7 +69,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) return -1; if (!grab_super_passive(sb)) - return !sc->nr_to_scan ? 0 : -1; + return -1; if (sb->s_op && sb->s_op->nr_cached_objects) fs_objects = sb->s_op->nr_cached_objects(sb); @@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) return total_objects; } +static int init_sb_writers(struct super_block *s, struct file_system_type *type) +{ + int err; + int i; + + for (i = 0; i < SB_FREEZE_LEVELS; i++) { + err = percpu_counter_init(&s->s_writers.counter[i], 0); + if (err < 0) + goto err_out; + lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], + &type->s_writers_key[i], 0); + } + init_waitqueue_head(&s->s_writers.wait); + init_waitqueue_head(&s->s_writers.wait_unfrozen); + return 0; +err_out: + while (--i >= 0) + percpu_counter_destroy(&s->s_writers.counter[i]); + return err; +} + +static void destroy_sb_writers(struct super_block *s) +{ + int i; + + for (i = 0; i < SB_FREEZE_LEVELS; i++) + percpu_counter_destroy(&s->s_writers.counter[i]); +} + /** * alloc_super - create new superblock * @type: filesystem type superblock should belong to @@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) if (s) { if (security_sb_alloc(s)) { + /* + * We cannot call security_sb_free() without + * security_sb_alloc() succeeding. So bail out manually + */ kfree(s); s = NULL; goto out; } #ifdef CONFIG_SMP s->s_files = alloc_percpu(struct list_head); - if (!s->s_files) { - security_sb_free(s); - kfree(s); - s = NULL; - goto out; - } else { + if (!s->s_files) + goto err_out; + else { int i; for_each_possible_cpu(i) @@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) #else INIT_LIST_HEAD(&s->s_files); #endif + if (init_sb_writers(s, type)) + goto err_out; s->s_flags = flags; s->s_bdi = &default_backing_dev_info; INIT_HLIST_NODE(&s->s_instances); @@ -178,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) mutex_init(&s->s_dquot.dqio_mutex); mutex_init(&s->s_dquot.dqonoff_mutex); init_rwsem(&s->s_dquot.dqptr_sem); - init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; @@ -190,6 +228,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) } out: return s; +err_out: + security_sb_free(s); +#ifdef CONFIG_SMP + if (s->s_files) + free_percpu(s->s_files); +#endif + destroy_sb_writers(s); + kfree(s); + s = NULL; + goto out; } /** @@ -203,6 +251,7 @@ static inline void destroy_super(struct super_block *s) #ifdef CONFIG_SMP free_percpu(s->s_files); #endif + destroy_sb_writers(s); security_sb_free(s); WARN_ON(!list_empty(&s->s_mounts)); kfree(s->s_subtype); @@ -651,10 +700,11 @@ struct super_block *get_super_thawed(struct block_device *bdev) { while (1) { struct super_block *s = get_super(bdev); - if (!s || s->s_frozen == SB_UNFROZEN) + if (!s || s->s_writers.frozen == SB_UNFROZEN) return s; up_read(&s->s_umount); - vfs_check_frozen(s, SB_FREEZE_WRITE); + wait_event(s->s_writers.wait_unfrozen, + s->s_writers.frozen == SB_UNFROZEN); put_super(s); } } @@ -732,7 +782,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) int retval; int remount_ro; - if (sb->s_frozen != SB_UNFROZEN) + if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; #ifdef CONFIG_BLOCK @@ -1163,6 +1213,120 @@ out: return ERR_PTR(error); } +/* + * This is an internal function, please use sb_end_{write,pagefault,intwrite} + * instead. + */ +void __sb_end_write(struct super_block *sb, int level) +{ + percpu_counter_dec(&sb->s_writers.counter[level-1]); + /* + * Make sure s_writers are updated before we wake up waiters in + * freeze_super(). + */ + smp_mb(); + if (waitqueue_active(&sb->s_writers.wait)) + wake_up(&sb->s_writers.wait); + rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_); +} +EXPORT_SYMBOL(__sb_end_write); + +#ifdef CONFIG_LOCKDEP +/* + * We want lockdep to tell us about possible deadlocks with freezing but + * it's it bit tricky to properly instrument it. Getting a freeze protection + * works as getting a read lock but there are subtle problems. XFS for example + * gets freeze protection on internal level twice in some cases, which is OK + * only because we already hold a freeze protection also on higher level. Due + * to these cases we have to tell lockdep we are doing trylock when we + * already hold a freeze protection for a higher freeze level. + */ +static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock, + unsigned long ip) +{ + int i; + + if (!trylock) { + for (i = 0; i < level - 1; i++) + if (lock_is_held(&sb->s_writers.lock_map[i])) { + trylock = true; + break; + } + } + rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip); +} +#endif + +/* + * This is an internal function, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +int __sb_start_write(struct super_block *sb, int level, bool wait) +{ +retry: + if (unlikely(sb->s_writers.frozen >= level)) { + if (!wait) + return 0; + wait_event(sb->s_writers.wait_unfrozen, + sb->s_writers.frozen < level); + } + +#ifdef CONFIG_LOCKDEP + acquire_freeze_lock(sb, level, !wait, _RET_IP_); +#endif + percpu_counter_inc(&sb->s_writers.counter[level-1]); + /* + * Make sure counter is updated before we check for frozen. + * freeze_super() first sets frozen and then checks the counter. + */ + smp_mb(); + if (unlikely(sb->s_writers.frozen >= level)) { + __sb_end_write(sb, level); + goto retry; + } + return 1; +} +EXPORT_SYMBOL(__sb_start_write); + +/** + * sb_wait_write - wait until all writers to given file system finish + * @sb: the super for which we wait + * @level: type of writers we wait for (normal vs page fault) + * + * This function waits until there are no writers of given type to given file + * system. Caller of this function should make sure there can be no new writers + * of type @level before calling this function. Otherwise this function can + * livelock. + */ +static void sb_wait_write(struct super_block *sb, int level) +{ + s64 writers; + + /* + * We just cycle-through lockdep here so that it does not complain + * about returning with lock to userspace + */ + rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); + rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); + + do { + DEFINE_WAIT(wait); + + /* + * We use a barrier in prepare_to_wait() to separate setting + * of frozen and checking of the counter + */ + prepare_to_wait(&sb->s_writers.wait, &wait, + TASK_UNINTERRUPTIBLE); + + writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); + if (writers) + schedule(); + + finish_wait(&sb->s_writers.wait, &wait); + } while (writers); +} + /** * freeze_super - lock the filesystem and force it into a consistent state * @sb: the super to lock @@ -1170,6 +1334,31 @@ out: * Syncs the super to make sure the filesystem is consistent and calls the fs's * freeze_fs. Subsequent calls to this without first thawing the fs will return * -EBUSY. + * + * During this function, sb->s_writers.frozen goes through these values: + * + * SB_UNFROZEN: File system is normal, all writes progress as usual. + * + * SB_FREEZE_WRITE: The file system is in the process of being frozen. New + * writes should be blocked, though page faults are still allowed. We wait for + * all writes to complete and then proceed to the next stage. + * + * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked + * but internal fs threads can still modify the filesystem (although they + * should not dirty new pages or inodes), writeback can run etc. After waiting + * for all running page faults we sync the filesystem which will clean all + * dirty pages and inodes (no new dirty pages or inodes can be created when + * sync is running). + * + * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs + * modification are blocked (e.g. XFS preallocation truncation on inode + * reclaim). This is usually implemented by blocking new transactions for + * filesystems that have them and need this additional guard. After all + * internal writers are finished we call ->freeze_fs() to finish filesystem + * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is + * mostly auxiliary for filesystems to verify they do not modify frozen fs. + * + * sb->s_writers.frozen is protected by sb->s_umount. */ int freeze_super(struct super_block *sb) { @@ -1177,7 +1366,7 @@ int freeze_super(struct super_block *sb) atomic_inc(&sb->s_active); down_write(&sb->s_umount); - if (sb->s_frozen) { + if (sb->s_writers.frozen != SB_UNFROZEN) { deactivate_locked_super(sb); return -EBUSY; } @@ -1188,33 +1377,53 @@ int freeze_super(struct super_block *sb) } if (sb->s_flags & MS_RDONLY) { - sb->s_frozen = SB_FREEZE_TRANS; - smp_wmb(); + /* Nothing to do really... */ + sb->s_writers.frozen = SB_FREEZE_COMPLETE; up_write(&sb->s_umount); return 0; } - sb->s_frozen = SB_FREEZE_WRITE; + /* From now on, no new normal writers can start */ + sb->s_writers.frozen = SB_FREEZE_WRITE; + smp_wmb(); + + /* Release s_umount to preserve sb_start_write -> s_umount ordering */ + up_write(&sb->s_umount); + + sb_wait_write(sb, SB_FREEZE_WRITE); + + /* Now we go and block page faults... */ + down_write(&sb->s_umount); + sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; smp_wmb(); + sb_wait_write(sb, SB_FREEZE_PAGEFAULT); + + /* All writers are done so after syncing there won't be dirty data */ sync_filesystem(sb); - sb->s_frozen = SB_FREEZE_TRANS; + /* Now wait for internal filesystem counter */ + sb->s_writers.frozen = SB_FREEZE_FS; smp_wmb(); + sb_wait_write(sb, SB_FREEZE_FS); - sync_blockdev(sb->s_bdev); if (sb->s_op->freeze_fs) { ret = sb->s_op->freeze_fs(sb); if (ret) { printk(KERN_ERR "VFS:Filesystem freeze failed\n"); - sb->s_frozen = SB_UNFROZEN; + sb->s_writers.frozen = SB_UNFROZEN; smp_wmb(); - wake_up(&sb->s_wait_unfrozen); + wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; } } + /* + * This is just for debugging purposes so that fs can warn if it + * sees write activity when frozen is set to SB_FREEZE_COMPLETE. + */ + sb->s_writers.frozen = SB_FREEZE_COMPLETE; up_write(&sb->s_umount); return 0; } @@ -1231,7 +1440,7 @@ int thaw_super(struct super_block *sb) int error; down_write(&sb->s_umount); - if (sb->s_frozen == SB_UNFROZEN) { + if (sb->s_writers.frozen == SB_UNFROZEN) { up_write(&sb->s_umount); return -EINVAL; } @@ -1244,16 +1453,15 @@ int thaw_super(struct super_block *sb) if (error) { printk(KERN_ERR "VFS:Filesystem thaw failed\n"); - sb->s_frozen = SB_FREEZE_TRANS; up_write(&sb->s_umount); return error; } } out: - sb->s_frozen = SB_UNFROZEN; + sb->s_writers.frozen = SB_UNFROZEN; smp_wmb(); - wake_up(&sb->s_wait_unfrozen); + wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return 0; diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index a4759833d62..614b2b54488 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = 0; if (bb->vm_ops->page_mkwrite) ret = bb->vm_ops->page_mkwrite(vma, vmf); + else + file_update_time(file); sysfs_put_active(attr_sd); return ret; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 15052ff916e..e562dd43f41 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -124,6 +124,12 @@ xfs_setfilesize_trans_alloc( ioend->io_append_trans = tp; /* + * We will pass freeze protection with a transaction. So tell lockdep + * we released it. + */ + rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 1, _THIS_IP_); + /* * We hand off the transaction to the completion thread now, so * clear the flag here. */ @@ -199,6 +205,15 @@ xfs_end_io( struct xfs_inode *ip = XFS_I(ioend->io_inode); int error = 0; + if (ioend->io_append_trans) { + /* + * We've got freeze protection passed with the transaction. + * Tell lockdep about it. + */ + rwsem_acquire_read( + &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); + } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { ioend->io_error = -EIO; goto done; @@ -1425,6 +1440,9 @@ out_trans_cancel: if (ioend->io_append_trans) { current_set_flags_nested(&ioend->io_append_trans->t_pflags, PF_FSTRANS); + rwsem_acquire_read( + &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); xfs_trans_cancel(ioend->io_append_trans, 0); } out_destroy_ioend: diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c4559c6e6f2..56afcdb2377 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -770,10 +770,12 @@ xfs_file_aio_write( if (ocount == 0) return 0; - xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); + sb_start_write(inode->i_sb); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + ret = -EIO; + goto out; + } if (unlikely(file->f_flags & O_DIRECT)) ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); @@ -792,6 +794,8 @@ xfs_file_aio_write( ret = err; } +out: + sb_end_write(inode->i_sb); return ret; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f1535d25a9..0e0232c3b6d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -364,9 +364,15 @@ xfs_fssetdm_by_handle( if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(parfilp); + if (error) + return error; + dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + mnt_drop_write_file(parfilp); return PTR_ERR(dentry); + } if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { error = -XFS_ERROR(EPERM); @@ -382,6 +388,7 @@ xfs_fssetdm_by_handle( fsd.fsd_dmstate); out: + mnt_drop_write_file(parfilp); dput(dentry); return error; } @@ -634,7 +641,11 @@ xfs_ioc_space( if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); + mnt_drop_write_file(filp); return -error; } @@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr( { struct fsxattr fa; unsigned int mask; + int error; if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; @@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr( if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) mask |= FSX_NONBLOCK; - return -xfs_ioctl_setattr(ip, &fa, mask); + error = mnt_want_write_file(filp); + if (error) + return error; + error = xfs_ioctl_setattr(ip, &fa, mask); + mnt_drop_write_file(filp); + return -error; } STATIC int @@ -1196,6 +1213,7 @@ xfs_ioc_setxflags( struct fsxattr fa; unsigned int flags; unsigned int mask; + int error; if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; @@ -1210,7 +1228,12 @@ xfs_ioc_setxflags( mask |= FSX_NONBLOCK; fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - return -xfs_ioctl_setattr(ip, &fa, mask); + error = mnt_want_write_file(filp); + if (error) + return error; + error = xfs_ioctl_setattr(ip, &fa, mask); + mnt_drop_write_file(filp); + return -error; } STATIC int @@ -1385,8 +1408,13 @@ xfs_file_ioctl( if (copy_from_user(&dmi, arg, sizeof(dmi))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; + error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, dmi.fsd_dmstate); + mnt_drop_write_file(filp); return -error; } @@ -1434,7 +1462,11 @@ xfs_file_ioctl( if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_swapext(&sxp); + mnt_drop_write_file(filp); return -error; } @@ -1463,9 +1495,14 @@ xfs_file_ioctl( if (copy_from_user(&inout, arg, sizeof(inout))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; + /* input parameter is passed in resblks field of structure */ in = inout.resblks; error = xfs_reserve_blocks(mp, &in, &inout); + mnt_drop_write_file(filp); if (error) return -error; @@ -1496,7 +1533,11 @@ xfs_file_ioctl( if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_data(mp, &in); + mnt_drop_write_file(filp); return -error; } @@ -1506,7 +1547,11 @@ xfs_file_ioctl( if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_log(mp, &in); + mnt_drop_write_file(filp); return -error; } @@ -1516,7 +1561,11 @@ xfs_file_ioctl( if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_rt(mp, &in); + mnt_drop_write_file(filp); return -error; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c4f2da0d2bf..1244274a567 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -600,7 +600,11 @@ xfs_file_compat_ioctl( if (xfs_compat_growfs_data_copyin(&in, arg)) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_data(mp, &in); + mnt_drop_write_file(filp); return -error; } case XFS_IOC_FSGROWFSRT_32: { @@ -608,7 +612,11 @@ xfs_file_compat_ioctl( if (xfs_compat_growfs_rt_copyin(&in, arg)) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_growfs_rt(mp, &in); + mnt_drop_write_file(filp); return -error; } #endif @@ -627,7 +635,11 @@ xfs_file_compat_ioctl( offsetof(struct xfs_swapext, sx_stat)) || xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) return -XFS_ERROR(EFAULT); + error = mnt_want_write_file(filp); + if (error) + return error; error = xfs_swapext(&sxp); + mnt_drop_write_file(filp); return -error; } case XFS_IOC_FSBULKSTAT_32: diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 915edf6639f..973dff6ad93 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -680,9 +680,9 @@ xfs_iomap_write_unwritten( * the same inode that we complete here and might deadlock * on the iolock. */ - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); + sb_start_intwrite(mp->m_super); tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); - tp->t_flags |= XFS_TRANS_RESERVE; + tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 711ca51ca3d..29c2f83d414 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1551,7 +1551,7 @@ xfs_unmountfs( int xfs_fs_writable(xfs_mount_t *mp) { - return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || + return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY)); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8724336a9a0..05a05a7b611 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -311,9 +311,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, #define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ #define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ -#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) -#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) - /* * Flags for xfs_mountfs */ diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 97304f10e78..96548176db8 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -403,7 +403,7 @@ xfs_sync_worker( if (!(mp->m_super->s_flags & MS_ACTIVE) && !(mp->m_flags & XFS_MOUNT_RDONLY)) { /* dgc: errors ignored here */ - if (mp->m_super->s_frozen == SB_UNFROZEN && + if (mp->m_super->s_writers.frozen == SB_UNFROZEN && xfs_log_need_covered(mp)) error = xfs_fs_log_dummy(mp); else diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdf324508c5..06ed520a767 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -576,8 +576,12 @@ xfs_trans_alloc( xfs_mount_t *mp, uint type) { - xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); - return _xfs_trans_alloc(mp, type, KM_SLEEP); + xfs_trans_t *tp; + + sb_start_intwrite(mp->m_super); + tp = _xfs_trans_alloc(mp, type, KM_SLEEP); + tp->t_flags |= XFS_TRANS_FREEZE_PROT; + return tp; } xfs_trans_t * @@ -588,6 +592,7 @@ _xfs_trans_alloc( { xfs_trans_t *tp; + WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); atomic_inc(&mp->m_active_trans); tp = kmem_zone_zalloc(xfs_trans_zone, memflags); @@ -611,6 +616,8 @@ xfs_trans_free( xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); atomic_dec(&tp->t_mountp->m_active_trans); + if (tp->t_flags & XFS_TRANS_FREEZE_PROT) + sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); kmem_zone_free(xfs_trans_zone, tp); } @@ -643,7 +650,11 @@ xfs_trans_dup( ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_ticket != NULL); - ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); + ntp->t_flags = XFS_TRANS_PERM_LOG_RES | + (tp->t_flags & XFS_TRANS_RESERVE) | + (tp->t_flags & XFS_TRANS_FREEZE_PROT); + /* We gave our writer reference to the new transaction */ + tp->t_flags &= ~XFS_TRANS_FREEZE_PROT; ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index bc2afd52a0b..db056544cbb 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -179,6 +179,8 @@ struct xfs_log_item_desc { #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ +#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer + count in superblock */ /* * Values for call flags parameter. |