summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c28
-rw-r--r--fs/9p/cache.c204
-rw-r--r--fs/9p/cache.h64
-rw-r--r--fs/9p/fid.c114
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/v9fs.c108
-rw-r--r--fs/9p/v9fs.h53
-rw-r--r--fs/9p/v9fs_vfs.h26
-rw-r--r--fs/9p/vfs_addr.c194
-rw-r--r--fs/9p/vfs_dentry.c47
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c316
-rw-r--r--fs/9p/vfs_inode.c307
-rw-r--r--fs/9p/vfs_inode_dotl.c198
-rw-r--r--fs/9p/vfs_super.c65
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c56
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c44
-rw-r--r--fs/btrfs/extent_io.c165
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c148
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/btrfs/xattr.c6
-rw-r--r--fs/btrfs/xattr.h3
-rw-r--r--fs/cachefiles/namei.c52
-rw-r--r--fs/ceph/dir.c30
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/debugfs/inode.c26
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/namei.c17
-rw-r--r--fs/ext2/xattr.h6
-rw-r--r--fs/ext2/xattr_security.c5
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/namei.c15
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext3/xattr.h4
-rw-r--r--fs/ext3/xattr_security.c5
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/ext4/xattr.h4
-rw-r--r--fs/ext4/xattr_security.c5
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c64
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/acl.c7
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/bmap.c20
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c77
-rw-r--r--fs/gfs2/glock.c410
-rw-r--r--fs/gfs2/glock.h39
-rw-r--r--fs/gfs2/glops.c33
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/inode.c7
-rw-r--r--fs/gfs2/lock_dlm.c14
-rw-r--r--fs/gfs2/log.c32
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/main.c15
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/ops_inode.c10
-rw-r--r--fs/gfs2/quota.c14
-rw-r--r--fs/gfs2/rgrp.c34
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/inode.c63
-rw-r--r--fs/internal.h15
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/security.c5
-rw-r--r--fs/jffs2/write.c18
-rw-r--r--fs/jffs2/xattr.h5
-rw-r--r--fs/jfs/jfs_xattr.h5
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/xattr.c6
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1493
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c131
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/super.c35
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/open.c137
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c15
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_security.c3
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c9
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_mru_cache.c2
161 files changed, 4111 insertions, 2620 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 02a2cf61631..51545529637 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -21,8 +21,8 @@
#include <linux/posix_acl_xattr.h>
#include "xattr.h"
#include "acl.h"
-#include "v9fs_vfs.h"
#include "v9fs.h"
+#include "v9fs_vfs.h"
static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
{
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(inode);
- if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
+ ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
return 0;
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
- posix_acl_release(dacl);
- posix_acl_release(pacl);
} else
retval = -EIO;
+ if (!IS_ERR(dacl))
+ posix_acl_release(dacl);
+
+ if (!IS_ERR(pacl))
+ posix_acl_release(pacl);
+
return retval;
}
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
return -ECHILD;
v9ses = v9fs_inode2v9ses(inode);
- if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
+ ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
/*
- * On access = client mode get the acl
+ * On access = client and acl = on mode get the acl
* values from the server
*/
return 0;
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
struct inode *inode = dentry->d_inode;
set_cached_acl(inode, type, acl);
+
+ if (!acl)
+ return 0;
+
/* Set a setxattr request to server */
size = posix_acl_xattr_size(acl->a_count);
buffer = kmalloc(size, GFP_KERNEL);
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry)
int v9fs_set_create_acl(struct dentry *dentry,
struct posix_acl *dpacl, struct posix_acl *pacl)
{
- if (dpacl)
- v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
- if (pacl)
- v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
+ v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
+ v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
posix_acl_release(dpacl);
posix_acl_release(pacl);
return 0;
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 0dbe0d139ac..5b335c5086a 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -33,67 +33,11 @@
#define CACHETAG_LEN 11
-struct kmem_cache *vcookie_cache;
-
struct fscache_netfs v9fs_cache_netfs = {
.name = "9p",
.version = 0,
};
-static void init_once(void *foo)
-{
- struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
- vcookie->fscache = NULL;
- vcookie->qid = NULL;
- inode_init_once(&vcookie->inode);
-}
-
-/**
- * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
- * vcookie to inode mapping
- *
- * Returns 0 on success.
- */
-
-static int v9fs_init_vcookiecache(void)
-{
- vcookie_cache = kmem_cache_create("vcookie_cache",
- sizeof(struct v9fs_cookie),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once);
- if (!vcookie_cache)
- return -ENOMEM;
-
- return 0;
-}
-
-/**
- * v9fs_destroy_vcookiecache - destroy the cache of vcookies
- *
- */
-
-static void v9fs_destroy_vcookiecache(void)
-{
- kmem_cache_destroy(vcookie_cache);
-}
-
-int __v9fs_cache_register(void)
-{
- int ret;
- ret = v9fs_init_vcookiecache();
- if (ret < 0)
- return ret;
-
- return fscache_register_netfs(&v9fs_cache_netfs);
-}
-
-void __v9fs_cache_unregister(void)
-{
- v9fs_destroy_vcookiecache();
- fscache_unregister_netfs(&v9fs_cache_netfs);
-}
-
/**
* v9fs_random_cachetag - Generate a random tag to be associated
* with a new cache session.
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
}
const struct fscache_cookie_def v9fs_cache_session_index_def = {
- .name = "9P.session",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = v9fs_cache_session_get_key,
+ .name = "9P.session",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = v9fs_cache_session_get_key,
};
void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
-
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
- vcookie->qid->path);
- return sizeof(vcookie->qid->path);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ memcpy(buffer, &v9inode->fscache_key->path,
+ sizeof(v9inode->fscache_key->path));
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
+ v9inode->fscache_key->path);
+ return sizeof(v9inode->fscache_key->path);
}
static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
uint64_t *size)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- *size = i_size_read(&vcookie->inode);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ *size = i_size_read(&v9inode->vfs_inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
*size);
}
static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
void *buffer, uint16_t buflen)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
-
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
- vcookie->qid->version);
- return sizeof(vcookie->qid->version);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ memcpy(buffer, &v9inode->fscache_key->version,
+ sizeof(v9inode->fscache_key->version));
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
+ v9inode->fscache_key->version);
+ return sizeof(v9inode->fscache_key->version);
}
static enum
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
const void *buffer,
uint16_t buflen)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
- if (buflen != sizeof(vcookie->qid->version))
+ if (buflen != sizeof(v9inode->fscache_key->version))
return FSCACHE_CHECKAUX_OBSOLETE;
- if (memcmp(buffer, &vcookie->qid->version,
- sizeof(vcookie->qid->version)))
+ if (memcmp(buffer, &v9inode->fscache_key->version,
+ sizeof(v9inode->fscache_key->version)))
return FSCACHE_CHECKAUX_OBSOLETE;
return FSCACHE_CHECKAUX_OKAY;
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
{
- struct v9fs_cookie *vcookie = cookie_netfs_data;
+ struct v9fs_inode *v9inode = cookie_netfs_data;
struct pagevec pvec;
pgoff_t first;
int loop, nr_pages;
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
first = 0;
for (;;) {
- nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+ nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
first,
PAGEVEC_SIZE - pagevec_count(&pvec));
if (!nr_pages)
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
void v9fs_cache_inode_get_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie;
+ struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
if (!S_ISREG(inode->i_mode))
return;
- vcookie = v9fs_inode2cookie(inode);
- if (vcookie->fscache)
+ v9inode = V9FS_I(inode);
+ if (v9inode->fscache)
return;
v9ses = v9fs_inode2v9ses(inode);
- vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- vcookie);
+ v9inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
}
void v9fs_cache_inode_put_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
- fscache_relinquish_cookie(vcookie->fscache, 0);
- vcookie->fscache = NULL;
+ fscache_relinquish_cookie(v9inode->fscache, 0);
+ v9inode->fscache = NULL;
}
void v9fs_cache_inode_flush_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
- fscache_relinquish_cookie(vcookie->fscache, 1);
- vcookie->fscache = NULL;
+ fscache_relinquish_cookie(v9inode->fscache, 1);
+ v9inode->fscache = NULL;
}
void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid;
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
- spin_lock(&vcookie->lock);
+ spin_lock(&v9inode->fscache_lock);
fid = filp->private_data;
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
v9fs_cache_inode_flush_cookie(inode);
else
v9fs_cache_inode_get_cookie(inode);
- spin_unlock(&vcookie->lock);
+ spin_unlock(&v9inode->fscache_lock);
}
void v9fs_cache_inode_reset_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct v9fs_session_info *v9ses;
struct fscache_cookie *old;
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
- old = vcookie->fscache;
+ old = v9inode->fscache;
- spin_lock(&vcookie->lock);
- fscache_relinquish_cookie(vcookie->fscache, 1);
+ spin_lock(&v9inode->fscache_lock);
+ fscache_relinquish_cookie(v9inode->fscache, 1);
v9ses = v9fs_inode2v9ses(inode);
- vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- vcookie);
-
+ v9inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
- inode, old, vcookie->fscache);
+ inode, old, v9inode->fscache);
- spin_unlock(&vcookie->lock);
+ spin_unlock(&v9inode->fscache_lock);
}
int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
{
struct inode *inode = page->mapping->host;
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- BUG_ON(!vcookie->fscache);
+ BUG_ON(!v9inode->fscache);
- return fscache_maybe_release_page(vcookie->fscache, page, gfp);
+ return fscache_maybe_release_page(v9inode->fscache, page, gfp);
}
void __v9fs_fscache_invalidate_page(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- BUG_ON(!vcookie->fscache);
+ BUG_ON(!v9inode->fscache);
if (PageFsCache(page)) {
- fscache_wait_on_page_write(vcookie->fscache, page);
+ fscache_wait_on_page_write(v9inode->fscache, page);
BUG_ON(!PageLocked(page));
- fscache_uncache_page(vcookie->fscache, page);
+ fscache_uncache_page(v9inode->fscache, page);
}
}
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data,
int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return -ENOBUFS;
- ret = fscache_read_or_alloc_page(vcookie->fscache,
+ ret = fscache_read_or_alloc_page(v9inode->fscache,
page,
v9fs_vfs_readpage_complete,
NULL,
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
unsigned *nr_pages)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return -ENOBUFS;
- ret = fscache_read_or_alloc_pages(vcookie->fscache,
+ ret = fscache_read_or_alloc_pages(v9inode->fscache,
mapping, pages, nr_pages,
v9fs_vfs_readpage_complete,
NULL,
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
- ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+ ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
if (ret != 0)
v9fs_uncache_page(inode, page);
}
+
+/*
+ * wait for a page to complete writing to the cache
+ */
+void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
+{
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ if (PageFsCache(page))
+ fscache_wait_on_page_write(v9inode->fscache, page);
+}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index a94192bfaee..049507a5b01 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -25,20 +25,6 @@
#include <linux/fscache.h>
#include <linux/spinlock.h>
-extern struct kmem_cache *vcookie_cache;
-
-struct v9fs_cookie {
- spinlock_t lock;
- struct inode inode;
- struct fscache_cookie *fscache;
- struct p9_qid *qid;
-};
-
-static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
-{
- return container_of(inode, struct v9fs_cookie, inode);
-}
-
extern struct fscache_netfs v9fs_cache_netfs;
extern const struct fscache_cookie_def v9fs_cache_session_index_def;
extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode,
struct list_head *pages,
unsigned *nr_pages);
extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
-
-
-/**
- * v9fs_cache_register - Register v9fs file system with the cache
- */
-static inline int v9fs_cache_register(void)
-{
- return __v9fs_cache_register();
-}
-
-/**
- * v9fs_cache_unregister - Unregister v9fs from the cache
- */
-static inline void v9fs_cache_unregister(void)
-{
- __v9fs_cache_unregister();
-}
+extern void __v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page);
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp)
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
- fscache_uncache_page(vcookie->fscache, page);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ fscache_uncache_page(v9inode->fscache, page);
BUG_ON(PageFsCache(page));
}
-static inline void v9fs_vcookie_set_qid(struct inode *inode,
+static inline void v9fs_fscache_set_key(struct inode *inode,
struct p9_qid *qid)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
- spin_lock(&vcookie->lock);
- vcookie->qid = qid;
- spin_unlock(&vcookie->lock);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ spin_lock(&v9inode->fscache_lock);
+ v9inode->fscache_key = qid;
+ spin_unlock(&v9inode->fscache_lock);
}
-#else /* CONFIG_9P_FSCACHE */
-
-static inline int v9fs_cache_register(void)
+static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
{
- return 1;
+ return __v9fs_fscache_wait_on_page_write(inode, page);
}
-static inline void v9fs_cache_unregister(void) {}
+#else /* CONFIG_9P_FSCACHE */
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp) {
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
{}
-static inline void v9fs_vcookie_set_qid(struct inode *inode,
- struct p9_qid *qid)
-{}
+static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
+{
+ return;
+}
#endif /* CONFIG_9P_FSCACHE */
#endif /* _9P_CACHE_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b00223c99d7..cd63e002d82 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -125,46 +125,17 @@ err_out:
return -ENOMEM;
}
-/**
- * v9fs_fid_lookup - lookup for a fid, try to walk if not found
- * @dentry: dentry to look for fid in
- *
- * Look for a fid in the specified dentry for the current user.
- * If no fid is found, try to create one walking from a fid from the parent
- * dentry (if it has one), or the root dentry. If the user haven't accessed
- * the fs yet, attach now and walk from the root.
- */
-
-struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
+static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
+ uid_t uid, int any)
{
- int i, n, l, clone, any, access;
- u32 uid;
- struct p9_fid *fid, *old_fid = NULL;
struct dentry *ds;
- struct v9fs_session_info *v9ses;
char **wnames, *uname;
+ int i, n, l, clone, access;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid, *old_fid = NULL;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
access = v9ses->flags & V9FS_ACCESS_MASK;
- switch (access) {
- case V9FS_ACCESS_SINGLE:
- case V9FS_ACCESS_USER:
- case V9FS_ACCESS_CLIENT:
- uid = current_fsuid();
- any = 0;
- break;
-
- case V9FS_ACCESS_ANY:
- uid = v9ses->uid;
- any = 1;
- break;
-
- default:
- uid = ~0;
- any = 0;
- break;
- }
-
fid = v9fs_fid_find(dentry, uid, any);
if (fid)
return fid;
@@ -250,6 +221,45 @@ err_out:
return fid;
}
+/**
+ * v9fs_fid_lookup - lookup for a fid, try to walk if not found
+ * @dentry: dentry to look for fid in
+ *
+ * Look for a fid in the specified dentry for the current user.
+ * If no fid is found, try to create one walking from a fid from the parent
+ * dentry (if it has one), or the root dentry. If the user haven't accessed
+ * the fs yet, attach now and walk from the root.
+ */
+
+struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
+{
+ uid_t uid;
+ int any, access;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ access = v9ses->flags & V9FS_ACCESS_MASK;
+ switch (access) {
+ case V9FS_ACCESS_SINGLE:
+ case V9FS_ACCESS_USER:
+ case V9FS_ACCESS_CLIENT:
+ uid = current_fsuid();
+ any = 0;
+ break;
+
+ case V9FS_ACCESS_ANY:
+ uid = v9ses->uid;
+ any = 1;
+ break;
+
+ default:
+ uid = ~0;
+ any = 0;
+ break;
+ }
+ return v9fs_fid_lookup_with_uid(dentry, uid, any);
+}
+
struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
{
struct p9_fid *fid, *ret;
@@ -261,3 +271,39 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
ret = p9_client_walk(fid, 0, NULL, 1);
return ret;
}
+
+static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
+{
+ struct p9_fid *fid, *ret;
+
+ fid = v9fs_fid_lookup_with_uid(dentry, uid, 0);
+ if (IS_ERR(fid))
+ return fid;
+
+ ret = p9_client_walk(fid, 0, NULL, 1);
+ return ret;
+}
+
+struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *fid;
+
+ fid = v9fs_fid_clone_with_uid(dentry, 0);
+ if (IS_ERR(fid))
+ goto error_out;
+ /*
+ * writeback fid will only be used to write back the
+ * dirty pages. We always request for the open fid in read-write
+ * mode so that a partial page write which result in page
+ * read can work.
+ */
+ err = p9_client_open(fid, O_RDWR);
+ if (err < 0) {
+ p9_client_clunk(fid);
+ fid = ERR_PTR(err);
+ goto error_out;
+ }
+error_out:
+ return fid;
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index c3bbd6af996..bb0b6e7f58f 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -19,7 +19,8 @@
* Boston, MA 02111-1301 USA
*
*/
-
+#ifndef FS_9P_FID_H
+#define FS_9P_FID_H
#include <linux/list.h>
/**
@@ -45,3 +46,5 @@ struct v9fs_dentry {
struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
+struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
+#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f77cd33ba8..c82b017f51f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,6 +39,7 @@
static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
static LIST_HEAD(v9fs_sessionlist);
+struct kmem_cache *v9fs_inode_cache;
/*
* Option Parsing (code inspired by NFS code)
@@ -55,7 +56,7 @@ enum {
/* Cache options */
Opt_cache_loose, Opt_fscache,
/* Access options */
- Opt_access,
+ Opt_access, Opt_posixacl,
/* Error token */
Opt_err
};
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
{Opt_fscache, "fscache"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
+ {Opt_posixacl, "posixacl"},
{Opt_err, NULL}
};
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
else if (strcmp(s, "any") == 0)
v9ses->flags |= V9FS_ACCESS_ANY;
else if (strcmp(s, "client") == 0) {
-#ifdef CONFIG_9P_FS_POSIX_ACL
v9ses->flags |= V9FS_ACCESS_CLIENT;
-#else
- P9_DPRINTK(P9_DEBUG_ERROR,
- "access=client option not supported\n");
- kfree(s);
- ret = -EINVAL;
- goto free_and_return;
-#endif
} else {
v9ses->flags |= V9FS_ACCESS_SINGLE;
v9ses->uid = simple_strtoul(s, &e, 10);
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
kfree(s);
break;
+ case Opt_posixacl:
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ v9ses->flags |= V9FS_POSIX_ACL;
+#else
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "Not defined CONFIG_9P_FS_POSIX_ACL. "
+ "Ignoring posixacl option\n");
+#endif
+ break;
+
default:
continue;
}
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
list_add(&v9ses->slist, &v9fs_sessionlist);
spin_unlock(&v9fs_sessionlist_lock);
- v9ses->flags = V9FS_ACCESS_USER;
strcpy(v9ses->uname, V9FS_DEFUSER);
strcpy(v9ses->aname, V9FS_DEFANAME);
v9ses->uid = ~0;
v9ses->dfltuid = V9FS_DEFUID;
v9ses->dfltgid = V9FS_DEFGID;
- rc = v9fs_parse_options(v9ses, data);
- if (rc < 0) {
- retval = rc;
- goto error;
- }
-
v9ses->clnt = p9_client_create(dev_name, data);
if (IS_ERR(v9ses->clnt)) {
retval = PTR_ERR(v9ses->clnt);
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
goto error;
}
- if (p9_is_proto_dotl(v9ses->clnt))
+ v9ses->flags = V9FS_ACCESS_USER;
+
+ if (p9_is_proto_dotl(v9ses->clnt)) {
+ v9ses->flags = V9FS_ACCESS_CLIENT;
v9ses->flags |= V9FS_PROTO_2000L;
- else if (p9_is_proto_dotu(v9ses->clnt))
+ } else if (p9_is_proto_dotu(v9ses->clnt)) {
v9ses->flags |= V9FS_PROTO_2000U;
+ }
+
+ rc = v9fs_parse_options(v9ses, data);
+ if (rc < 0) {
+ retval = rc;
+ goto error;
+ }
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->flags |= V9FS_ACCESS_ANY;
v9ses->uid = ~0;
}
+ if (!v9fs_proto_dotl(v9ses) ||
+ !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
+ /*
+ * We support ACL checks on clinet only if the protocol is
+ * 9P2000.L and access is V9FS_ACCESS_CLIENT.
+ */
+ v9ses->flags &= ~V9FS_ACL_MASK;
+ }
fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
v9ses->aname);
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void)
kobject_put(v9fs_kobj);
}
+static void v9fs_inode_init_once(void *foo)
+{
+ struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
+#ifdef CONFIG_9P_FSCACHE
+ v9inode->fscache = NULL;
+ v9inode->fscache_key = NULL;
+#endif
+ inode_init_once(&v9inode->vfs_inode);
+}
+
+/**
+ * v9fs_init_inode_cache - initialize a cache for 9P
+ * Returns 0 on success.
+ */
+static int v9fs_init_inode_cache(void)
+{
+ v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
+ sizeof(struct v9fs_inode),
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
+ v9fs_inode_init_once);
+ if (!v9fs_inode_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * v9fs_destroy_inode_cache - destroy the cache of 9P inode
+ *
+ */
+static void v9fs_destroy_inode_cache(void)
+{
+ kmem_cache_destroy(v9fs_inode_cache);
+}
+
+static int v9fs_cache_register(void)
+{
+ int ret;
+ ret = v9fs_init_inode_cache();
+ if (ret < 0)
+ return ret;
+#ifdef CONFIG_9P_FSCACHE
+ return fscache_register_netfs(&v9fs_cache_netfs);
+#else
+ return ret;
+#endif
+}
+
+static void v9fs_cache_unregister(void)
+{
+ v9fs_destroy_inode_cache();
+#ifdef CONFIG_9P_FSCACHE
+ fscache_unregister_netfs(&v9fs_cache_netfs);
+#endif
+}
+
/**
* init_v9fs - Initialize module
*
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c4b5d8864f0..bd8496db135 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,9 @@
* Boston, MA 02111-1301 USA
*
*/
+#ifndef FS_9P_V9FS_H
+#define FS_9P_V9FS_H
+
#include <linux/backing-dev.h>
/**
@@ -28,8 +31,10 @@
* @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
* @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
* @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
+ * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client.
* @V9FS_ACCESS_ANY: use a single attach for all users
* @V9FS_ACCESS_MASK: bit mask of different ACCESS options
+ * @V9FS_POSIX_ACL: POSIX ACLs are enforced
*
* Session flags reflect options selected by users at mount time
*/
@@ -37,13 +42,15 @@
V9FS_ACCESS_USER | \
V9FS_ACCESS_CLIENT)
#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
+#define V9FS_ACL_MASK V9FS_POSIX_ACL
enum p9_session_flags {
V9FS_PROTO_2000U = 0x01,
V9FS_PROTO_2000L = 0x02,
V9FS_ACCESS_SINGLE = 0x04,
V9FS_ACCESS_USER = 0x08,
- V9FS_ACCESS_CLIENT = 0x10
+ V9FS_ACCESS_CLIENT = 0x10,
+ V9FS_POSIX_ACL = 0x20
};
/* possible values of ->cache */
@@ -109,8 +116,28 @@ struct v9fs_session_info {
struct list_head slist; /* list of sessions registered with v9fs */
struct backing_dev_info bdi;
struct rw_semaphore rename_sem;
+ struct p9_fid *root_fid; /* Used for file system sync */
+};
+
+/* cache_validity flags */
+#define V9FS_INO_INVALID_ATTR 0x01
+
+struct v9fs_inode {
+#ifdef CONFIG_9P_FSCACHE
+ spinlock_t fscache_lock;
+ struct fscache_cookie *fscache;
+ struct p9_qid *fscache_key;
+#endif
+ unsigned int cache_validity;
+ struct p9_fid *writeback_fid;
+ struct inode vfs_inode;
};
+static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
+{
+ return container_of(inode, struct v9fs_inode, vfs_inode);
+}
+
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
char *);
extern void v9fs_session_close(struct v9fs_session_info *v9ses);
@@ -124,16 +151,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
void *p);
-extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses,
- struct p9_fid *fid,
- struct super_block *sb);
-
+extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
-extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses,
- struct p9_fid *fid,
- struct super_block *sb);
+extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
/* other default globals */
#define V9FS_PORT 564
@@ -158,7 +184,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
}
/**
- * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * v9fs_get_inode_from_fid - Helper routine to populate an inode by
* issuing a attribute request
* @v9ses: session information
* @fid: fid to issue attribute request for
@@ -166,11 +192,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
*
*/
static inline struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_inode_dotl(v9ses, fid, sb);
+ return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
else
- return v9fs_inode(v9ses, fid, sb);
+ return v9fs_inode_from_fid(v9ses, fid, sb);
}
+#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b789f8e597e..4014160903a 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -20,6 +20,8 @@
* Boston, MA 02111-1301 USA
*
*/
+#ifndef FS_9P_V9FS_VFS_H
+#define FS_9P_V9FS_VFS_H
/* plan9 semantics are that created files are implicitly opened.
* But linux semantics are that you call create, then open.
@@ -36,6 +38,7 @@
* unlink calls remove, which is an implicit clunk. So we have to track
* that kind of thing so that we don't try to clunk a dead fid.
*/
+#define P9_LOCK_TIMEOUT (30*HZ)
extern struct file_system_type v9fs_fs_type;
extern const struct address_space_operations v9fs_addr_operations;
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations;
extern const struct file_operations v9fs_dir_operations_dotl;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
+extern const struct file_operations v9fs_cached_file_operations;
+extern const struct file_operations v9fs_cached_file_operations_dotl;
+extern struct kmem_cache *v9fs_inode_cache;
-#ifdef CONFIG_9P_FSCACHE
struct inode *v9fs_alloc_inode(struct super_block *sb);
void v9fs_destroy_inode(struct inode *inode);
-#endif
-
struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+int v9fs_init_inode(struct v9fs_session_info *v9ses,
+ struct inode *inode, int mode);
void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
int v9fs_uflags2omode(int uflags, int extended);
ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
+ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
int v9fs_file_fsync_dotl(struct file *filp, int datasync);
-
-#define P9_LOCK_TIMEOUT (30*HZ)
+ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
+ const char __user *, size_t, loff_t *, int);
+int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
+int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
+static inline void v9fs_invalidate_inode_attr(struct inode *inode)
+{
+ struct v9fs_inode *v9inode;
+ v9inode = V9FS_I(inode);
+ v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
+ return;
+}
+#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b7f2a8e3863..2524e4cbb8e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -39,16 +39,16 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "cache.h"
+#include "fid.h"
/**
- * v9fs_vfs_readpage - read an entire page in from 9P
+ * v9fs_fid_readpage - read an entire page in from 9P
*
- * @filp: file being read
+ * @fid: fid being read
* @page: structure to page
*
*/
-
-static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
{
int retval;
loff_t offset;
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
buffer = kmap(page);
offset = page_offset(page);
- retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
+ retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
if (retval < 0) {
v9fs_uncache_page(inode, page);
goto done;
@@ -87,6 +87,19 @@ done:
}
/**
+ * v9fs_vfs_readpage - read an entire page in from 9P
+ *
+ * @filp: file being read
+ * @page: structure to page
+ *
+ */
+
+static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+{
+ return v9fs_fid_readpage(filp->private_data, page);
+}
+
+/**
* v9fs_vfs_readpages - read a set of pages from 9P
*
* @filp: file being read
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
{
if (PagePrivate(page))
return 0;
-
return v9fs_fscache_release_page(page, gfp);
}
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
static void v9fs_invalidate_page(struct page *page, unsigned long offset)
{
+ /*
+ * If called with zero offset, we should release
+ * the private state assocated with the page
+ */
if (offset == 0)
v9fs_fscache_invalidate_page(page);
}
+static int v9fs_vfs_writepage_locked(struct page *page)
+{
+ char *buffer;
+ int retval, len;
+ loff_t offset, size;
+ mm_segment_t old_fs;
+ struct v9fs_inode *v9inode;
+ struct inode *inode = page->mapping->host;
+
+ v9inode = V9FS_I(inode);
+ size = i_size_read(inode);
+ if (page->index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+
+ set_page_writeback(page);
+
+ buffer = kmap(page);
+ offset = page_offset(page);
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* We should have writeback_fid always set */
+ BUG_ON(!v9inode->writeback_fid);
+
+ retval = v9fs_file_write_internal(inode,
+ v9inode->writeback_fid,
+ (__force const char __user *)buffer,
+ len, &offset, 0);
+ if (retval > 0)
+ retval = 0;
+
+ set_fs(old_fs);
+ kunmap(page);
+ end_page_writeback(page);
+ return retval;
+}
+
+static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int retval;
+
+ retval = v9fs_vfs_writepage_locked(page);
+ if (retval < 0) {
+ if (retval == -EAGAIN) {
+ redirty_page_for_writepage(wbc, page);
+ retval = 0;
+ } else {
+ SetPageError(page);
+ mapping_set_error(page->mapping, retval);
+ }
+ } else
+ retval = 0;
+
+ unlock_page(page);
+ return retval;
+}
+
/**
* v9fs_launder_page - Writeback a dirty page
- * Since the writes go directly to the server, we simply return a 0
- * here to indicate success.
- *
* Returns 0 on success.
*/
static int v9fs_launder_page(struct page *page)
{
+ int retval;
+ struct inode *inode = page->mapping->host;
+
+ v9fs_fscache_wait_on_page_write(inode, page);
+ if (clear_page_dirty_for_io(page)) {
+ retval = v9fs_vfs_writepage_locked(page);
+ if (retval)
+ return retval;
+ }
return 0;
}
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page)
* with an error.
*
*/
-ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t pos, unsigned long nr_segs)
+static ssize_t
+v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t pos, unsigned long nr_segs)
{
+ /*
+ * FIXME
+ * Now that we do caching with cache mode enabled, We need
+ * to support direct IO
+ */
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
"off/no(%lld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
return -EINVAL;
}
+
+static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ int retval = 0;
+ struct page *page;
+ struct v9fs_inode *v9inode;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct inode *inode = mapping->host;
+
+ v9inode = V9FS_I(inode);
+start:
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ BUG_ON(!v9inode->writeback_fid);
+ if (PageUptodate(page))
+ goto out;
+
+ if (len == PAGE_CACHE_SIZE)
+ goto out;
+
+ retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
+ page_cache_release(page);
+ if (!retval)
+ goto start;
+out:
+ *pagep = page;
+ return retval;
+}
+
+static int v9fs_write_end(struct file *filp, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ loff_t last_pos = pos + copied;
+ struct inode *inode = page->mapping->host;
+
+ if (unlikely(copied < len)) {
+ /*
+ * zero out the rest of the area
+ */
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+ zero_user(page, from + copied, len - copied);
+ flush_dcache_page(page);
+ }
+
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ /*
+ * No need to use i_size_read() here, the i_size
+ * cannot change under us because we hold the i_mutex.
+ */
+ if (last_pos > inode->i_size) {
+ inode_add_bytes(inode, last_pos - inode->i_size);
+ i_size_write(inode, last_pos);
+ }
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ return copied;
+}
+
+
const struct address_space_operations v9fs_addr_operations = {
- .readpage = v9fs_vfs_readpage,
- .readpages = v9fs_vfs_readpages,
- .releasepage = v9fs_release_page,
- .invalidatepage = v9fs_invalidate_page,
- .launder_page = v9fs_launder_page,
- .direct_IO = v9fs_direct_IO,
+ .readpage = v9fs_vfs_readpage,
+ .readpages = v9fs_vfs_readpages,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+ .writepage = v9fs_vfs_writepage,
+ .write_begin = v9fs_write_begin,
+ .write_end = v9fs_write_end,
+ .releasepage = v9fs_release_page,
+ .invalidatepage = v9fs_invalidate_page,
+ .launder_page = v9fs_launder_page,
+ .direct_IO = v9fs_direct_IO,
};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 233b7d4ffe5..b6a3b9f7fe4 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
* v9fs_cached_dentry_delete - called when dentry refcount equals 0
* @dentry: dentry in question
*
- * Only return 1 if our inode is invalid. Only non-synthetic files
- * (ones without mtime == 0) should be calling this function.
- *
*/
-
static int v9fs_cached_dentry_delete(const struct dentry *dentry)
{
- struct inode *inode = dentry->d_inode;
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
- dentry);
+ P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
+ dentry->d_name.name, dentry);
- if(!inode)
+ /* Don't cache negative dentries */
+ if (!dentry->d_inode)
return 1;
-
return 0;
}
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry)
}
}
+static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct p9_fid *fid;
+ struct inode *inode;
+ struct v9fs_inode *v9inode;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ if (!inode)
+ goto out_valid;
+
+ v9inode = V9FS_I(inode);
+ if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
+ int retval;
+ struct v9fs_session_info *v9ses;
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9fs_proto_dotl(v9ses))
+ retval = v9fs_refresh_inode_dotl(fid, inode);
+ else
+ retval = v9fs_refresh_inode(fid, inode);
+ if (retval <= 0)
+ return retval;
+ }
+out_valid:
+ return 1;
+}
+
const struct dentry_operations v9fs_cached_dentry_operations = {
+ .d_revalidate = v9fs_lookup_revalidate,
.d_delete = v9fs_cached_dentry_delete,
.d_release = v9fs_dentry_release,
};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b84ebe8cefe..9c2bdda5cd9 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
P9_DPRINTK(P9_DEBUG_VFS,
"v9fs_dir_release: inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
- filemap_write_and_wait(inode->i_mapping);
if (fid)
p9_client_clunk(fid);
return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 240c3067439..78bcb97c342 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -44,8 +44,7 @@
#include "fid.h"
#include "cache.h"
-static const struct file_operations v9fs_cached_file_operations;
-static const struct file_operations v9fs_cached_file_operations_dotl;
+static const struct vm_operations_struct v9fs_file_vm_ops;
/**
* v9fs_file_open - open a file (or directory)
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl;
int v9fs_file_open(struct inode *inode, struct file *file)
{
int err;
+ struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
int omode;
P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
+ v9inode = V9FS_I(inode);
v9ses = v9fs_inode2v9ses(inode);
if (v9fs_proto_dotl(v9ses))
omode = file->f_flags;
@@ -89,20 +90,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}
file->private_data = fid;
- if ((fid->qid.version) && (v9ses->cache)) {
- P9_DPRINTK(P9_DEBUG_VFS, "cached");
- /* enable cached file options */
- if(file->f_op == &v9fs_file_operations)
- file->f_op = &v9fs_cached_file_operations;
- else if (file->f_op == &v9fs_file_operations_dotl)
- file->f_op = &v9fs_cached_file_operations_dotl;
-
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ fid = v9fs_writeback_fid(file->f_path.dentry);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ goto out_error;
+ }
+ v9inode->writeback_fid = (void *) fid;
+ }
#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
#endif
- }
-
return 0;
+out_error:
+ p9_client_clunk(file->private_data);
+ file->private_data = NULL;
+ return err;
}
/**
@@ -335,25 +346,22 @@ out_err:
}
/**
- * v9fs_file_readn - read from a file
- * @filp: file pointer to read
+ * v9fs_fid_readn - read from a fid
+ * @fid: fid to read
* @data: data buffer to read data into
* @udata: user data buffer to read data into
* @count: size of buffer
* @offset: offset at which to read data
*
*/
-
ssize_t
-v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
u64 offset)
{
int n, total, size;
- struct p9_fid *fid = filp->private_data;
P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
- (long long unsigned) offset, count);
-
+ (long long unsigned) offset, count);
n = 0;
total = 0;
size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -379,6 +387,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
}
/**
+ * v9fs_file_readn - read from a file
+ * @filp: file pointer to read
+ * @data: data buffer to read data into
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+ssize_t
+v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+ u64 offset)
+{
+ return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
+}
+
+/**
* v9fs_file_read - read from a file
* @filp: file pointer to read
* @udata: user data buffer to read data into
@@ -410,45 +434,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
return ret;
}
-/**
- * v9fs_file_write - write to a file
- * @filp: file pointer to write
- * @data: data buffer to write data from
- * @count: size of buffer
- * @offset: offset at which to write data
- *
- */
-
-static ssize_t
-v9fs_file_write(struct file *filp, const char __user * data,
- size_t count, loff_t * offset)
+ssize_t
+v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
+ const char __user *data, size_t count,
+ loff_t *offset, int invalidate)
{
- ssize_t retval;
- size_t total = 0;
int n;
- struct p9_fid *fid;
+ loff_t i_size;
+ size_t total = 0;
struct p9_client *clnt;
- struct inode *inode = filp->f_path.dentry->d_inode;
loff_t origin = *offset;
unsigned long pg_start, pg_end;
P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
(int)count, (int)*offset);
- fid = filp->private_data;
clnt = fid->clnt;
-
- retval = generic_write_checks(filp, &origin, &count, 0);
- if (retval)
- goto out;
-
- retval = -EINVAL;
- if ((ssize_t) count < 0)
- goto out;
- retval = 0;
- if (!count)
- goto out;
-
do {
n = p9_client_write(fid, NULL, data+total, origin+total, count);
if (n <= 0)
@@ -457,25 +458,60 @@ v9fs_file_write(struct file *filp, const char __user * data,
total += n;
} while (count > 0);
- if (total > 0) {
+ if (invalidate && (total > 0)) {
pg_start = origin >> PAGE_CACHE_SHIFT;
pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
if (inode->i_mapping && inode->i_mapping->nrpages)
invalidate_inode_pages2_range(inode->i_mapping,
pg_start, pg_end);
*offset += total;
- i_size_write(inode, i_size_read(inode) + total);
- inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ i_size = i_size_read(inode);
+ if (*offset > i_size) {
+ inode_add_bytes(inode, *offset - i_size);
+ i_size_write(inode, *offset);
+ }
}
-
if (n < 0)
- retval = n;
- else
- retval = total;
+ return n;
+
+ return total;
+}
+
+/**
+ * v9fs_file_write - write to a file
+ * @filp: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+static ssize_t
+v9fs_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offset)
+{
+ ssize_t retval = 0;
+ loff_t origin = *offset;
+
+
+ retval = generic_write_checks(filp, &origin, &count, 0);
+ if (retval)
+ goto out;
+
+ retval = -EINVAL;
+ if ((ssize_t) count < 0)
+ goto out;
+ retval = 0;
+ if (!count)
+ goto out;
+
+ return v9fs_file_write_internal(filp->f_path.dentry->d_inode,
+ filp->private_data,
+ data, count, offset, 1);
out:
return retval;
}
+
static int v9fs_file_fsync(struct file *filp, int datasync)
{
struct p9_fid *fid;
@@ -505,28 +541,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync)
return retval;
}
-static const struct file_operations v9fs_cached_file_operations = {
+static int
+v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int retval;
+
+ retval = generic_file_mmap(file, vma);
+ if (!retval)
+ vma->vm_ops = &v9fs_file_vm_ops;
+
+ return retval;
+}
+
+static int
+v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct v9fs_inode *v9inode;
+ struct page *page = vmf->page;
+ struct file *filp = vma->vm_file;
+ struct inode *inode = filp->f_path.dentry->d_inode;
+
+
+ P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
+ page, (unsigned long)filp->private_data);
+
+ v9inode = V9FS_I(inode);
+ /* make sure the cache has finished storing the page */
+ v9fs_fscache_wait_on_page_write(inode, page);
+ BUG_ON(!v9inode->writeback_fid);
+ lock_page(page);
+ if (page->mapping != inode->i_mapping)
+ goto out_unlock;
+
+ return VM_FAULT_LOCKED;
+out_unlock:
+ unlock_page(page);
+ return VM_FAULT_NOPAGE;
+}
+
+static ssize_t
+v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
+ loff_t *offsetp)
+{
+ loff_t size, offset;
+ struct inode *inode;
+ struct address_space *mapping;
+
+ offset = *offsetp;
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ if (!count)
+ return 0;
+ size = i_size_read(inode);
+ if (offset < size)
+ filemap_write_and_wait_range(mapping, offset,
+ offset + count - 1);
+
+ return v9fs_file_read(filp, udata, count, offsetp);
+}
+
+/**
+ * v9fs_cached_file_read - read from a file
+ * @filp: file pointer to read
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+static ssize_t
+v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
+ loff_t *offset)
+{
+ if (filp->f_flags & O_DIRECT)
+ return v9fs_direct_read(filp, data, count, offset);
+ return do_sync_read(filp, data, count, offset);
+}
+
+static ssize_t
+v9fs_direct_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offsetp)
+{
+ loff_t offset;
+ ssize_t retval;
+ struct inode *inode;
+ struct address_space *mapping;
+
+ offset = *offsetp;
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ if (!count)
+ return 0;
+
+ mutex_lock(&inode->i_mutex);
+ retval = filemap_write_and_wait_range(mapping, offset,
+ offset + count - 1);
+ if (retval)
+ goto err_out;
+ /*
+ * After a write we want buffered reads to be sure to go to disk to get
+ * the new data. We invalidate clean cached page from the region we're
+ * about to write. We do this *before* the write so that if we fail
+ * here we fall back to buffered write
+ */
+ if (mapping->nrpages) {
+ pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+
+ retval = invalidate_inode_pages2_range(mapping,
+ pg_start, pg_end);
+ /*
+ * If a page can not be invalidated, fall back
+ * to buffered write.
+ */
+ if (retval) {
+ if (retval == -EBUSY)
+ goto buff_write;
+ goto err_out;
+ }
+ }
+ retval = v9fs_file_write(filp, data, count, offsetp);
+err_out:
+ mutex_unlock(&inode->i_mutex);
+ return retval;
+
+buff_write:
+ mutex_unlock(&inode->i_mutex);
+ return do_sync_write(filp, data, count, offsetp);
+}
+
+/**
+ * v9fs_cached_file_write - write to a file
+ * @filp: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+static ssize_t
+v9fs_cached_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offset)
+{
+
+ if (filp->f_flags & O_DIRECT)
+ return v9fs_direct_write(filp, data, count, offset);
+ return do_sync_write(filp, data, count, offset);
+}
+
+static const struct vm_operations_struct v9fs_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = v9fs_vm_page_mkwrite,
+};
+
+
+const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
+ .read = v9fs_cached_file_read,
+ .write = v9fs_cached_file_write,
.aio_read = generic_file_aio_read,
- .write = v9fs_file_write,
+ .aio_write = generic_file_aio_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
- .mmap = generic_file_readonly_mmap,
+ .mmap = v9fs_file_mmap,
.fsync = v9fs_file_fsync,
};
-static const struct file_operations v9fs_cached_file_operations_dotl = {
+const struct file_operations v9fs_cached_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
+ .read = v9fs_cached_file_read,
+ .write = v9fs_cached_file_write,
.aio_read = generic_file_aio_read,
- .write = v9fs_file_write,
+ .aio_write = generic_file_aio_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
- .mmap = generic_file_readonly_mmap,
+ .mmap = v9fs_file_mmap,
.fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b76a40bdf4c..8a2c232f708 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -203,26 +203,25 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
wstat->extension = NULL;
}
-#ifdef CONFIG_9P_FSCACHE
/**
* v9fs_alloc_inode - helper function to allocate an inode
- * This callback is executed before setting up the inode so that we
- * can associate a vcookie with each inode.
*
*/
-
struct inode *v9fs_alloc_inode(struct super_block *sb)
{
- struct v9fs_cookie *vcookie;
- vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
- GFP_KERNEL);
- if (!vcookie)
+ struct v9fs_inode *v9inode;
+ v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache,
+ GFP_KERNEL);
+ if (!v9inode)
return NULL;
-
- vcookie->fscache = NULL;
- vcookie->qid = NULL;
- spin_lock_init(&vcookie->lock);
- return &vcookie->inode;
+#ifdef CONFIG_9P_FSCACHE
+ v9inode->fscache = NULL;
+ v9inode->fscache_key = NULL;
+ spin_lock_init(&v9inode->fscache_lock);
+#endif
+ v9inode->writeback_fid = NULL;
+ v9inode->cache_validity = 0;
+ return &v9inode->vfs_inode;
}
/**
@@ -234,35 +233,18 @@ static void v9fs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+ kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
}
void v9fs_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, v9fs_i_callback);
}
-#endif
-/**
- * v9fs_get_inode - helper function to setup an inode
- * @sb: superblock
- * @mode: mode to setup inode with
- *
- */
-
-struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+int v9fs_init_inode(struct v9fs_session_info *v9ses,
+ struct inode *inode, int mode)
{
- int err;
- struct inode *inode;
- struct v9fs_session_info *v9ses = sb->s_fs_info;
-
- P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
-
- inode = new_inode(sb);
- if (!inode) {
- P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
- return ERR_PTR(-ENOMEM);
- }
+ int err = 0;
inode_init_owner(inode, NULL, mode);
inode->i_blocks = 0;
@@ -292,14 +274,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
case S_IFREG:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- inode->i_fop = &v9fs_file_operations_dotl;
+ if (v9ses->cache)
+ inode->i_fop =
+ &v9fs_cached_file_operations_dotl;
+ else
+ inode->i_fop = &v9fs_file_operations_dotl;
} else {
inode->i_op = &v9fs_file_inode_operations;
- inode->i_fop = &v9fs_file_operations;
+ if (v9ses->cache)
+ inode->i_fop = &v9fs_cached_file_operations;
+ else
+ inode->i_fop = &v9fs_file_operations;
}
break;
-
case S_IFLNK:
if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
@@ -335,12 +323,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
err = -EINVAL;
goto error;
}
+error:
+ return err;
- return inode;
+}
-error:
- iput(inode);
- return ERR_PTR(err);
+/**
+ * v9fs_get_inode - helper function to setup an inode
+ * @sb: superblock
+ * @mode: mode to setup inode with
+ *
+ */
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+{
+ int err;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+
+ inode = new_inode(sb);
+ if (!inode) {
+ P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ err = v9fs_init_inode(v9ses, inode, mode);
+ if (err) {
+ iput(inode);
+ return ERR_PTR(err);
+ }
+ return inode;
}
/*
@@ -403,6 +416,8 @@ error:
*/
void v9fs_evict_inode(struct inode *inode)
{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+
truncate_inode_pages(inode->i_mapping, 0);
end_writeback(inode);
filemap_fdatawrite(inode->i_mapping);
@@ -410,41 +425,67 @@ void v9fs_evict_inode(struct inode *inode)
#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_put_cookie(inode);
#endif
+ /* clunk the fid stashed in writeback_fid */
+ if (v9inode->writeback_fid) {
+ p9_client_clunk(v9inode->writeback_fid);
+ v9inode->writeback_fid = NULL;
+ }
}
-struct inode *
-v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+static struct inode *v9fs_qid_iget(struct super_block *sb,
+ struct p9_qid *qid,
+ struct p9_wstat *st)
{
- int err, umode;
- struct inode *ret = NULL;
- struct p9_wstat *st;
-
- st = p9_client_stat(fid);
- if (IS_ERR(st))
- return ERR_CAST(st);
+ int retval, umode;
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+ i_ino = v9fs_qid2ino(qid);
+ inode = iget_locked(sb, i_ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+ /*
+ * initialize the inode with the stat info
+ * FIXME!! we may need support for stale inodes
+ * later.
+ */
umode = p9mode2unixmode(v9ses, st->mode);
- ret = v9fs_get_inode(sb, umode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
+ retval = v9fs_init_inode(v9ses, inode, umode);
+ if (retval)
goto error;
- }
-
- v9fs_stat2inode(st, ret, sb);
- ret->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode(st, inode, sb);
#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
+ v9fs_fscache_set_key(inode, &st->qid);
+ v9fs_cache_inode_get_cookie(inode);
#endif
- p9stat_free(st);
- kfree(st);
- return ret;
+ unlock_new_inode(inode);
+ return inode;
error:
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(retval);
+
+}
+
+struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct p9_wstat *st;
+ struct inode *inode = NULL;
+
+ st = p9_client_stat(fid);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ inode = v9fs_qid_iget(sb, &st->qid, st);
p9stat_free(st);
kfree(st);
- return ERR_PTR(err);
+ return inode;
}
/**
@@ -458,8 +499,8 @@ error:
static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
{
int retval;
- struct inode *file_inode;
struct p9_fid *v9fid;
+ struct inode *file_inode;
P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
rmdir);
@@ -470,8 +511,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
return PTR_ERR(v9fid);
retval = p9_client_remove(v9fid);
- if (!retval)
- drop_nlink(file_inode);
+ if (!retval) {
+ /*
+ * directories on unlink should have zero
+ * link count
+ */
+ if (rmdir) {
+ clear_nlink(file_inode);
+ drop_nlink(dir);
+ } else
+ drop_nlink(file_inode);
+
+ v9fs_invalidate_inode_attr(file_inode);
+ v9fs_invalidate_inode_attr(dir);
+ }
return retval;
}
@@ -531,7 +584,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -570,9 +623,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
int err;
u32 perm;
int flags;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
struct file *filp;
+ struct v9fs_inode *v9inode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid, *inode_fid;
err = 0;
fid = NULL;
@@ -592,8 +646,25 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
/* if we are opening a file, assign the open fid to the file */
if (nd && nd->flags & LOOKUP_OPEN) {
+ v9inode = V9FS_I(dentry->d_inode);
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ inode_fid = v9fs_writeback_fid(dentry);
+ if (IS_ERR(inode_fid)) {
+ err = PTR_ERR(inode_fid);
+ goto error;
+ }
+ v9inode->writeback_fid = (void *) inode_fid;
+ }
filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
@@ -601,6 +672,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
}
filp->private_data = fid;
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
+ v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
+#endif
} else
p9_client_clunk(fid);
@@ -625,8 +700,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
int err;
u32 perm;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct v9fs_session_info *v9ses;
P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
err = 0;
@@ -636,6 +711,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
fid = NULL;
+ } else {
+ inc_nlink(dir);
+ v9fs_invalidate_inode_attr(dir);
}
if (fid)
@@ -687,7 +765,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(result);
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
inode = NULL;
@@ -747,17 +825,19 @@ int
v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
+ int retval;
struct inode *old_inode;
+ struct inode *new_inode;
struct v9fs_session_info *v9ses;
struct p9_fid *oldfid;
struct p9_fid *olddirfid;
struct p9_fid *newdirfid;
struct p9_wstat wstat;
- int retval;
P9_DPRINTK(P9_DEBUG_VFS, "\n");
retval = 0;
old_inode = old_dentry->d_inode;
+ new_inode = new_dentry->d_inode;
v9ses = v9fs_inode2v9ses(old_inode);
oldfid = v9fs_fid_lookup(old_dentry);
if (IS_ERR(oldfid))
@@ -798,9 +878,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = p9_client_wstat(oldfid, &wstat);
clunk_newdir:
- if (!retval)
+ if (!retval) {
+ if (new_inode) {
+ if (S_ISDIR(new_inode->i_mode))
+ clear_nlink(new_inode);
+ else
+ drop_nlink(new_inode);
+ /*
+ * Work around vfs rename rehash bug with
+ * FS_RENAME_DOES_D_MOVE
+ */
+ v9fs_invalidate_inode_attr(new_inode);
+ }
+ if (S_ISDIR(old_inode->i_mode)) {
+ if (!new_inode)
+ inc_nlink(new_dir);
+ drop_nlink(old_dir);
+ }
+ v9fs_invalidate_inode_attr(old_inode);
+ v9fs_invalidate_inode_attr(old_dir);
+ v9fs_invalidate_inode_attr(new_dir);
+
/* successful rename */
d_move(old_dentry, new_dentry);
+ }
up_write(&v9ses->rename_sem);
p9_client_clunk(newdirfid);
@@ -831,9 +932,10 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+ }
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -891,17 +993,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (iattr->ia_valid & ATTR_GID)
wstat.n_gid = iattr->ia_gid;
}
-
- retval = p9_client_wstat(fid, &wstat);
- if (retval < 0)
- return retval;
-
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(dentry->d_inode)) {
retval = vmtruncate(dentry->d_inode, iattr->ia_size);
if (retval)
return retval;
}
+ /* Write all dirty data */
+ if (S_ISREG(dentry->d_inode->i_mode))
+ filemap_write_and_wait(dentry->d_inode->i_mapping);
+
+ retval = p9_client_wstat(fid, &wstat);
+ if (retval < 0)
+ return retval;
+ v9fs_invalidate_inode_attr(dentry->d_inode);
setattr_copy(dentry->d_inode, iattr);
mark_inode_dirty(dentry->d_inode);
@@ -924,6 +1029,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
char tag_name[14];
unsigned int i_nlink;
struct v9fs_session_info *v9ses = sb->s_fs_info;
+ struct v9fs_inode *v9inode = V9FS_I(inode);
inode->i_nlink = 1;
@@ -983,6 +1089,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
/* not real number of blocks, but 512 byte ones ... */
inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
/**
@@ -1115,8 +1222,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
int mode, const char *extension)
{
u32 perm;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(dir);
if (!v9fs_proto_dotu(v9ses)) {
@@ -1130,6 +1237,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
if (IS_ERR(fid))
return PTR_ERR(fid);
+ v9fs_invalidate_inode_attr(dir);
p9_client_clunk(fid);
return 0;
}
@@ -1166,8 +1274,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int retval;
- struct p9_fid *oldfid;
char *name;
+ struct p9_fid *oldfid;
P9_DPRINTK(P9_DEBUG_VFS,
" %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
@@ -1186,7 +1294,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
sprintf(name, "%d\n", oldfid->fid);
retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
__putname(name);
-
+ if (!retval) {
+ v9fs_refresh_inode(oldfid, old_dentry->d_inode);
+ v9fs_invalidate_inode_attr(dir);
+ }
clunk_fid:
p9_client_clunk(oldfid);
return retval;
@@ -1237,6 +1348,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
+int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
+{
+ loff_t i_size;
+ struct p9_wstat *st;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ st = p9_client_stat(fid);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We don't want to refresh inode->i_size,
+ * because we may have cached data
+ */
+ i_size = inode->i_size;
+ v9fs_stat2inode(st, inode, inode->i_sb);
+ if (v9ses->cache)
+ inode->i_size = i_size;
+ spin_unlock(&inode->i_lock);
+ p9stat_free(st);
+ kfree(st);
+ return 0;
+}
+
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index fe3ffa9aace..67c138e94fe 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
return dentry;
}
+static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
+ struct p9_qid *qid,
+ struct p9_fid *fid,
+ struct p9_stat_dotl *st)
+{
+ int retval;
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ i_ino = v9fs_qid2ino(qid);
+ inode = iget_locked(sb, i_ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+ /*
+ * initialize the inode with the stat info
+ * FIXME!! we may need support for stale inodes
+ * later.
+ */
+ retval = v9fs_init_inode(v9ses, inode, st->st_mode);
+ if (retval)
+ goto error;
+
+ v9fs_stat2inode_dotl(st, inode);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_fscache_set_key(inode, &st->qid);
+ v9fs_cache_inode_get_cookie(inode);
+#endif
+ retval = v9fs_get_acl(inode, fid);
+ if (retval)
+ goto error;
+
+ unlock_new_inode(inode);
+ return inode;
+error:
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(retval);
+
+}
+
struct inode *
-v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
{
- struct inode *ret = NULL;
- int err;
struct p9_stat_dotl *st;
+ struct inode *inode = NULL;
st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
if (IS_ERR(st))
return ERR_CAST(st);
- ret = v9fs_get_inode(sb, st->st_mode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
- goto error;
- }
-
- v9fs_stat2inode_dotl(st, ret);
- ret->i_ino = v9fs_qid2ino(&st->qid);
-#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
-#endif
- err = v9fs_get_acl(ret, fid);
- if (err) {
- iput(ret);
- goto error;
- }
- kfree(st);
- return ret;
-error:
+ inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
kfree(st);
- return ERR_PTR(err);
+ return inode;
}
/**
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
struct nameidata *nd)
{
int err = 0;
- char *name = NULL;
gid_t gid;
int flags;
mode_t mode;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL;
- struct p9_fid *dfid, *ofid;
+ char *name = NULL;
struct file *filp;
struct p9_qid qid;
struct inode *inode;
+ struct p9_fid *fid = NULL;
+ struct v9fs_inode *v9inode;
+ struct p9_fid *dfid, *ofid, *inode_fid;
+ struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
v9ses = v9fs_inode2v9ses(dir);
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
err);
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
/* instantiate inode and assign the unopened fid to the dentry */
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
fid = NULL;
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -219,6 +244,22 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
/* Now set the ACL based on the default value */
v9fs_set_create_acl(dentry, dacl, pacl);
+ v9inode = V9FS_I(inode);
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ inode_fid = v9fs_writeback_fid(dentry);
+ if (IS_ERR(inode_fid)) {
+ err = PTR_ERR(inode_fid);
+ goto error;
+ }
+ v9inode->writeback_fid = (void *) inode_fid;
+ }
/* Since we are opening a file, assign the open fid to the file */
filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
@@ -226,6 +267,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
return PTR_ERR(filp);
}
filp->private_data = ofid;
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
+ v9fs_cache_inode_set_cookie(inode, filp);
+#endif
return 0;
error:
@@ -300,7 +345,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -327,7 +372,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
}
/* Now set the ACL based on the default value */
v9fs_set_create_acl(dentry, dacl, pacl);
-
+ inc_nlink(dir);
+ v9fs_invalidate_inode_attr(dir);
error:
if (fid)
p9_client_clunk(fid);
@@ -346,9 +392,10 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+ }
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -406,16 +453,20 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
if (IS_ERR(fid))
return PTR_ERR(fid);
- retval = p9_client_setattr(fid, &p9attr);
- if (retval < 0)
- return retval;
-
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(dentry->d_inode)) {
retval = vmtruncate(dentry->d_inode, iattr->ia_size);
if (retval)
return retval;
}
+ /* Write all dirty data */
+ if (S_ISREG(dentry->d_inode->i_mode))
+ filemap_write_and_wait(dentry->d_inode->i_mapping);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval < 0)
+ return retval;
+ v9fs_invalidate_inode_attr(dentry->d_inode);
setattr_copy(dentry->d_inode, iattr);
mark_inode_dirty(dentry->d_inode);
@@ -439,6 +490,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
void
v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
inode->i_atime.tv_sec = stat->st_atime_sec;
@@ -497,20 +549,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
/* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
* because the inode structure does not have fields for them.
*/
+ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
static int
v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct v9fs_session_info *v9ses;
- struct p9_fid *dfid;
- struct p9_fid *fid = NULL;
- struct inode *inode;
- struct p9_qid qid;
- char *name;
int err;
gid_t gid;
+ char *name;
+ struct p9_qid qid;
+ struct inode *inode;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct v9fs_session_info *v9ses;
name = (char *) dentry->d_name.name;
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
@@ -534,6 +587,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
if (v9ses->cache) {
/* Now walk from the parent so we can get an unopened fid. */
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -546,7 +600,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
}
/* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -588,10 +642,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int err;
- struct p9_fid *dfid, *oldfid;
char *name;
- struct v9fs_session_info *v9ses;
struct dentry *dir_dentry;
+ struct p9_fid *dfid, *oldfid;
+ struct v9fs_session_info *v9ses;
P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
dir->i_ino, old_dentry->d_name.name,
@@ -616,29 +670,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
return err;
}
+ v9fs_invalidate_inode_attr(dir);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
/* Get the latest stat info from server. */
struct p9_fid *fid;
- struct p9_stat_dotl *st;
-
fid = v9fs_fid_lookup(old_dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
- if (IS_ERR(st))
- return PTR_ERR(st);
-
- v9fs_stat2inode_dotl(st, old_dentry->d_inode);
-
- kfree(st);
- } else {
- /* Caching disabled. No need to get upto date stat info.
- * This dentry will be released immediately. So, just hold the
- * inode
- */
- ihold(old_dentry->d_inode);
+ v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
}
+ ihold(old_dentry->d_inode);
d_instantiate(dentry, old_dentry->d_inode);
return err;
@@ -657,12 +699,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
dev_t rdev)
{
int err;
+ gid_t gid;
char *name;
mode_t mode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
struct inode *inode;
- gid_t gid;
struct p9_qid qid;
struct dentry *dir_dentry;
struct posix_acl *dacl = NULL, *pacl = NULL;
@@ -699,6 +741,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
if (err < 0)
goto error;
+ v9fs_invalidate_inode_attr(dir);
/* instantiate inode and assign the unopened fid to the dentry */
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -710,7 +753,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -782,6 +825,31 @@ ndset:
return NULL;
}
+int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
+{
+ loff_t i_size;
+ struct p9_stat_dotl *st;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We don't want to refresh inode->i_size,
+ * because we may have cached data
+ */
+ i_size = inode->i_size;
+ v9fs_stat2inode_dotl(st, inode);
+ if (v9ses->cache)
+ inode->i_size = i_size;
+ spin_unlock(&inode->i_lock);
+ kfree(st);
+ return 0;
+}
+
const struct inode_operations v9fs_dir_inode_operations_dotl = {
.create = v9fs_vfs_create_dotl,
.lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index dbaabe3b813..09fd08d1606 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
} else
sb->s_op = &v9fs_super_ops;
sb->s_bdi = &v9ses->bdi;
+ if (v9ses->cache)
+ sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
- sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
- MS_NOATIME;
+ sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
+ if (!v9ses->cache)
+ sb->s_flags |= MS_SYNCHRONOUS;
#ifdef CONFIG_9P_FS_POSIX_ACL
- if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
+ if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
sb->s_flags |= MS_POSIXACL;
#endif
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(inode);
goto release_sb;
}
-
root = d_alloc_root(inode);
if (!root) {
iput(inode);
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(st);
goto release_sb;
}
-
+ root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
v9fs_stat2inode_dotl(st, root->d_inode);
kfree(st);
} else {
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
p9stat_free(st);
kfree(st);
}
+ v9fs_fid_add(root, fid);
retval = v9fs_get_acl(inode, fid);
if (retval)
goto release_sb;
- v9fs_fid_add(root, fid);
+ /*
+ * Add the root fid to session info. This is used
+ * for file system sync. We want a cloned fid here
+ * so that we can do a sync_filesystem after a
+ * shrink_dcache_for_umount
+ */
+ v9ses->root_fid = v9fs_fid_clone(root);
+ if (IS_ERR(v9ses->root_fid)) {
+ retval = PTR_ERR(v9ses->root_fid);
+ goto release_sb;
+ }
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
return dget(sb->s_root);
@@ -197,15 +210,11 @@ close_session:
v9fs_session_close(v9ses);
kfree(v9ses);
return ERR_PTR(retval);
-
release_sb:
/*
- * we will do the session_close and root dentry release
- * in the below call. But we need to clunk fid, because we haven't
- * attached the fid to dentry so it won't get clunked
- * automatically.
+ * we will do the session_close and root dentry
+ * release in the below call.
*/
- p9_client_clunk(fid);
deactivate_locked_super(sb);
return ERR_PTR(retval);
}
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s)
P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
kill_anon_super(s);
-
+ p9_client_clunk(v9ses->root_fid);
v9fs_session_cancel(v9ses);
v9fs_session_close(v9ses);
kfree(v9ses);
@@ -276,11 +285,31 @@ done:
return res;
}
+static int v9fs_sync_fs(struct super_block *sb, int wait)
+{
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
+ return p9_client_sync_fs(v9ses->root_fid);
+}
+
+static int v9fs_drop_inode(struct inode *inode)
+{
+ struct v9fs_session_info *v9ses;
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9ses->cache)
+ return generic_drop_inode(inode);
+ /*
+ * in case of non cached mode always drop the
+ * the inode because we want the inode attribute
+ * to always match that on the server.
+ */
+ return 1;
+}
+
static const struct super_operations v9fs_super_ops = {
-#ifdef CONFIG_9P_FSCACHE
.alloc_inode = v9fs_alloc_inode,
.destroy_inode = v9fs_destroy_inode,
-#endif
.statfs = simple_statfs,
.evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = {
};
static const struct super_operations v9fs_super_ops_dotl = {
-#ifdef CONFIG_9P_FSCACHE
.alloc_inode = v9fs_alloc_inode,
.destroy_inode = v9fs_destroy_inode,
-#endif
+ .sync_fs = v9fs_sync_fs,
.statfs = v9fs_statfs,
+ .drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
.umount_begin = v9fs_umount_begin,
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = {
.mount = v9fs_mount,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
- .fs_flags = FS_RENAME_DOES_D_MOVE,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
};
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57ed..7cb53aafac1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
def_bool n
config EXPORTFS
- tristate
+ bool
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c..ba01202844c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_FHANDLE) += fhandle.o
+
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b..789b3afb342 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
candidate->first = candidate->last = index;
candidate->offset_first = from;
candidate->to_last = to;
+ INIT_LIST_HEAD(&candidate->link);
candidate->usage = 1;
candidate->state = AFS_WBACK_PENDING;
init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a..7f54f43b8f7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
- aio_wq = create_workqueue("aio");
+ aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
BUG_ON(!aio_wq || !abe_pool);
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
-#define get_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- atomic_inc(&(kioctx)->users); \
-} while (0)
-#define put_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
- __put_ioctx(kioctx); \
-} while (0)
+static inline void get_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ atomic_inc(&kioctx->users);
+}
+
+static inline int try_get_ioctx(struct kioctx *kioctx)
+{
+ return atomic_inc_not_zero(&kioctx->users);
+}
+
+static inline void put_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ if (unlikely(atomic_dec_and_test(&kioctx->users)))
+ __put_ioctx(kioctx);
+}
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -569,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
- queue_work(aio_wq, &fput_work);
+ schedule_work(&fput_work);
} else {
req->ki_filp = NULL;
really_put_req(ctx, req);
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
rcu_read_lock();
hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
- if (ctx->user_id == ctx_id && !ctx->dead) {
- get_ioctx(ctx);
+ /*
+ * RCU protects us against accessing freed memory but
+ * we have to be careful not to get a reference when the
+ * reference count already dropped to 0 (ctx->dead test
+ * is unreliable because of races).
+ */
+ if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
ret = ctx;
break;
}
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
spin_lock_irq(&ctx->ctx_lock);
+ /*
+ * We could have raced with io_destroy() and are currently holding a
+ * reference to ctx which should be destroyed. We cannot submit IO
+ * since ctx gets freed as soon as io_submit() puts its reference. The
+ * check here is reliable: io_destroy() sets ctx->dead before waiting
+ * for outstanding IO and the barrier between these two is realized by
+ * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
+ * increment ctx->reqs_active before checking for ctx->dead and the
+ * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
+ * don't see ctx->dead set here, io_destroy() waits for our IO to
+ * finish.
+ */
+ if (ctx->dead) {
+ spin_unlock_irq(&ctx->ctx_lock);
+ ret = -EINVAL;
+ goto out_put_req;
+ }
aio_run_iocb(req);
if (!list_empty(&ctx->run_list)) {
/* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4fb8a343153..88928701959 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
if (ret)
goto out_del;
+ /*
+ * bdev could be deleted beneath us which would implicitly destroy
+ * the holder directory. Hold on to it.
+ */
+ kobject_get(bdev->bd_part->holder_dir);
list_add(&holder->list, &bdev->bd_holder_disks);
goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
del_symlink(bdev->bd_part->holder_dir,
&disk_to_dev(disk)->kobj);
+ kobject_put(bdev->bd_part->holder_dir);
list_del_init(&holder->list);
kfree(holder);
}
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
* flush_disk - invalidates all buffer-cache entries on a disk
*
* @bdev: struct block device to be flushed
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Invalidates all buffer-cache entries on a disk. It should be called
* when a disk has been changed -- either by a media change or online
* resize.
*/
-static void flush_disk(struct block_device *bdev)
+static void flush_disk(struct block_device *bdev, bool kill_dirty)
{
- if (__invalidate_device(bdev)) {
+ if (__invalidate_device(bdev, kill_dirty)) {
char name[BDEVNAME_SIZE] = "";
if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
"%s: detected capacity change from %lld to %lld\n",
name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
- flush_disk(bdev);
+ flush_disk(bdev, false);
}
}
EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return 0;
- flush_disk(bdev);
+ flush_disk(bdev, true);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
return 1;
@@ -1600,7 +1607,7 @@ fail:
}
EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev)
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
{
struct super_block *sb = get_super(bdev);
int res = 0;
@@ -1613,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
* hold).
*/
shrink_dcache_sb(sb);
- res = invalidate_inodes(sb);
+ res = invalidate_inodes(sb, kill_dirty);
drop_super(sb);
}
invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af605..7f78cc78fdd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ /*
+ * we bump reservation progress every time we decrement
+ * bytes_reserved. This way people waiting for reservations
+ * know something good has happened and they can check
+ * for progress. The number here isn't to be trusted, it
+ * just shows reclaim activity
+ */
+ unsigned long reservation_progress;
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b..b4ffad859ad 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
int len = *max_len;
int type;
- if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
- (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+ if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE;
return 255;
+ } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
+ *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+ return 255;
+ }
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc0143..7b3089b5c2d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int pause = 1;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
+ unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
+ progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_reserved) {
- loops = 0;
+ if (reserved > space_info->bytes_reserved)
reclaimed += reserved - space_info->bytes_reserved;
- } else {
- loops++;
- }
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
+ loops++;
+
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
- __set_current_state(TASK_INTERRUPTIBLE);
- time_left = schedule_timeout(pause);
+ time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
if (time_left)
break;
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
+ /* we've kicked the IO a few times, if anything has been freed,
+ * exit. There is no sense in looping here for a long time
+ * when we really need to commit the transaction, or there are
+ * just too many writers without enough free space
+ */
+
+ if (loops > 3) {
+ smp_mb();
+ if (progress != space_info->reservation_progress)
+ break;
+ }
}
return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
+ sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
+ cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
@@ -5376,7 +5387,7 @@ again:
num_bytes, data, 1);
goto again;
}
- if (ret == -ENOSPC) {
+ if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8076,13 @@ out:
return ret;
}
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type)
+{
+ u64 alloc_flags = get_alloc_profile(root, type);
+ return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
+
/*
* helper to account the unused space of all the readonly block group in the
* list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 92ac5192c51..714adc4ac4c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits)
+ unsigned long bits, int contig)
{
struct rb_node *node;
struct extent_state *state;
u64 cur_start = *start;
u64 total_bytes = 0;
+ u64 last = 0;
int found = 0;
if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
state = rb_entry(node, struct extent_state, rb_node);
if (state->start > search_end)
break;
- if (state->end >= cur_start && (state->state & bits)) {
+ if (contig && found && state->start > last + 1)
+ break;
+ if (state->end >= cur_start && (state->state & bits) == bits) {
total_bytes += min(search_end, state->end) + 1 -
max(cur_start, state->start);
if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
*start = state->start;
found = 1;
}
+ last = state->end;
+ } else if (contig && found) {
+ break;
}
node = rb_next(node);
if (!node)
@@ -2912,6 +2918,46 @@ out:
return sector;
}
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+ u64 offset,
+ u64 last,
+ get_extent_t *get_extent)
+{
+ u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+ struct extent_map *em;
+ u64 len;
+
+ if (offset >= last)
+ return NULL;
+
+ while(1) {
+ len = last - offset;
+ if (len == 0)
+ break;
+ len = (len + sectorsize - 1) & ~(sectorsize - 1);
+ em = get_extent(inode, NULL, 0, offset, len, 0);
+ if (!em || IS_ERR(em))
+ return em;
+
+ /* if this isn't a hole return it */
+ if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+ em->block_start != EXTENT_MAP_HOLE) {
+ return em;
+ }
+
+ /* this is a hole, advance to the next extent */
+ offset = extent_map_end(em);
+ free_extent_map(em);
+ if (offset >= last)
+ break;
+ }
+ return NULL;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u32 flags = 0;
u32 found_type;
u64 last;
+ u64 last_for_get_extent = 0;
u64 disko = 0;
+ u64 isize = i_size_read(inode);
struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_file_extent_item *item;
int end = 0;
- u64 em_start = 0, em_len = 0;
+ u64 em_start = 0;
+ u64 em_len = 0;
+ u64 em_end = 0;
unsigned long emflags;
- int hole = 0;
if (len == 0)
return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
+ /*
+ * lookup the last file extent. We're not using i_size here
+ * because there might be preallocation past i_size
+ */
ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
path, inode->i_ino, -1, 0);
if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
- /* No extents, just return */
+ /* No extents, but there might be delalloc bits */
if (found_key.objectid != inode->i_ino ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- btrfs_free_path(path);
- return 0;
+ /* have to trust i_size as the end */
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ } else {
+ /*
+ * remember the start of the last extent. There are a
+ * bunch of different factors that go into the length of the
+ * extent, so its much less complex to remember where it started
+ */
+ last = found_key.offset;
+ last_for_get_extent = last + 1;
}
- last = found_key.offset;
btrfs_free_path(path);
+ /*
+ * we might have some extents allocated but more delalloc past those
+ * extents. so, we trust isize unless the start of the last extent is
+ * beyond isize
+ */
+ if (last < isize) {
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ }
+
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
- em = get_extent(inode, NULL, 0, off, max - off, 0);
+
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@@ -2973,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- hole = 0;
- off = em->start + em->len;
- if (off >= max)
- end = 1;
+ u64 offset_in_extent;
- if (em->block_start == EXTENT_MAP_HOLE) {
- hole = 1;
- goto next;
- }
+ /* break if the extent we found is outside the range */
+ if (em->start >= max || extent_map_end(em) < off)
+ break;
- em_start = em->start;
- em_len = em->len;
+ /*
+ * get_extent may return an extent that starts before our
+ * requested range. We have to make sure the ranges
+ * we return to fiemap always move forward and don't
+ * overlap, so adjust the offsets here
+ */
+ em_start = max(em->start, off);
+ /*
+ * record the offset from the start of the extent
+ * for adjusting the disk offset below
+ */
+ offset_in_extent = em_start - em->start;
+ em_end = extent_map_end(em);
+ em_len = em_end - em_start;
+ emflags = em->flags;
disko = 0;
flags = 0;
+ /*
+ * bump off for our next call to get_extent
+ */
+ off = extent_map_end(em);
+ if (off >= max)
+ end = 1;
+
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
@@ -2999,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- disko = em->block_start;
+ disko = em->block_start + offset_in_extent;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
-next:
- emflags = em->flags;
free_extent_map(em);
em = NULL;
- if (!end) {
- em = get_extent(inode, NULL, 0, off, max - off, 0);
- if (!em)
- goto out;
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- emflags = em->flags;
- }
-
- if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+ if ((em_start >= last) || em_len == (u64)-1 ||
+ (last == (u64)-1 && isize <= em_end)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- if (em_start == last) {
+ /* now scan forward to see if this is really the last extent. */
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ if (!em) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
-
- if (!hole) {
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
- }
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
}
out_free:
free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd06..9318dfefd59 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
- u64 max_bytes, unsigned long bits);
+ u64 max_bytes, unsigned long bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d594..f447b783bb8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */
flush_dcache_page(page);
+
+ /*
+ * if we get a partial write, we can end up with
+ * partially up to date pages. These add
+ * a lot of complexity, so make sure they don't
+ * happen by forcing this copy to be retried.
+ *
+ * The rest of the btrfs_file_write code will fall
+ * back to page at a time copies after we return 0.
+ */
+ if (!PageUptodate(page) && copied < count)
+ copied = 0;
+
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
@@ -763,6 +776,27 @@ out:
}
/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+ int ret = 0;
+
+ if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ret;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
* this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be
* modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
+ int faili = 0;
u64 start_pos;
u64 last_pos;
@@ -794,15 +829,24 @@ again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
- int c;
- for (c = i - 1; c >= 0; c--) {
- unlock_page(pages[c]);
- page_cache_release(pages[c]);
- }
- return -ENOMEM;
+ faili = i - 1;
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ if (i == 0)
+ err = prepare_uptodate_page(pages[i], pos);
+ if (i == num_pages - 1)
+ err = prepare_uptodate_page(pages[i],
+ pos + write_bytes);
+ if (err) {
+ page_cache_release(pages[i]);
+ faili = i - 1;
+ goto fail;
}
wait_on_page_writeback(pages[i]);
}
+ err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
WARN_ON(!PageLocked(pages[i]));
}
return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
}
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
- pinned[0] = NULL;
- pinned[1] = NULL;
-
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
- /*
- * there are lots of better ways to do this, but this code
- * makes sure the first and last page in the file range are
- * up to date and ready for cow
- */
- if ((pos & (PAGE_CACHE_SIZE - 1))) {
- pinned[0] = grab_cache_page(inode->i_mapping, first_index);
- if (!PageUptodate(pinned[0])) {
- ret = btrfs_readpage(NULL, pinned[0]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[0]);
- } else {
- unlock_page(pinned[0]);
- }
- }
- if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
- pinned[1] = grab_cache_page(inode->i_mapping, last_index);
- if (!PageUptodate(pinned[1])) {
- ret = btrfs_readpage(NULL, pinned[1]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[1]);
- } else {
- unlock_page(pinned[1]);
- }
- }
-
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+
+ /*
+ * if we have trouble faulting in the pages, fall
+ * back to one page at a time
+ */
+ if (copied < write_bytes)
+ nrptrs = 1;
+
+ if (copied == 0)
+ dirty_pages = 0;
+ else
+ dirty_pages = (copied + offset +
+ PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) {
if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
err = ret;
kfree(pages);
- if (pinned[0])
- page_cache_release(pinned[0]);
- if (pinned[1])
- page_cache_release(pinned[1]);
*ppos = pos;
/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb9bd7832b6..512c3d1da08 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -90,13 +90,14 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long *nr_written, int unlock);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
err = btrfs_init_acl(trans, inode, dir);
if (!err)
- err = btrfs_xattr_security_init(trans, inode, dir);
+ err = btrfs_xattr_security_init(trans, inode, dir, qstr);
return err;
}
@@ -1913,7 +1914,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
private = 0;
if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY)) {
+ (u64)-1, 1, EXTENT_DIRTY, 0)) {
ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
start, &private_failure);
if (ret == 0) {
@@ -4704,7 +4705,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4765,7 +4766,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4806,9 +4807,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
int err;
int drop_inode = 0;
- if (inode->i_nlink == 0)
- return -ENOENT;
-
/* do not allow sys_link's with other subvols of the same device */
if (root->objectid != BTRFS_I(inode)->root->objectid)
return -EPERM;
@@ -4821,10 +4819,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
/*
- * 1 item for inode ref
+ * 2 items for inode and inode ref
* 2 items for dir items
+ * 1 item for parent inode
*/
- trans = btrfs_start_transaction(root, 3);
+ trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto fail;
@@ -4893,7 +4892,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
drop_on_err = 1;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
goto out_fail;
@@ -5280,6 +5279,128 @@ out:
return em;
}
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
+{
+ struct extent_map *em;
+ struct extent_map *hole_em = NULL;
+ u64 range_start = start;
+ u64 end;
+ u64 found;
+ u64 found_end;
+ int err = 0;
+
+ em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+ if (IS_ERR(em))
+ return em;
+ if (em) {
+ /*
+ * if our em maps to a hole, there might
+ * actually be delalloc bytes behind it
+ */
+ if (em->block_start != EXTENT_MAP_HOLE)
+ return em;
+ else
+ hole_em = em;
+ }
+
+ /* check to see if we've wrapped (len == -1 or similar) */
+ end = start + len;
+ if (end < start)
+ end = (u64)-1;
+ else
+ end -= 1;
+
+ em = NULL;
+
+ /* ok, we didn't find anything, lets look for delalloc */
+ found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+ end, len, EXTENT_DELALLOC, 1);
+ found_end = range_start + found;
+ if (found_end < range_start)
+ found_end = (u64)-1;
+
+ /*
+ * we didn't find anything useful, return
+ * the original results from get_extent()
+ */
+ if (range_start > end || found_end <= start) {
+ em = hole_em;
+ hole_em = NULL;
+ goto out;
+ }
+
+ /* adjust the range_start to make sure it doesn't
+ * go backwards from the start they passed in
+ */
+ range_start = max(start,range_start);
+ found = found_end - range_start;
+
+ if (found > 0) {
+ u64 hole_start = start;
+ u64 hole_len = len;
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /*
+ * when btrfs_get_extent can't find anything it
+ * returns one huge hole
+ *
+ * make sure what it found really fits our range, and
+ * adjust to make sure it is based on the start from
+ * the caller
+ */
+ if (hole_em) {
+ u64 calc_end = extent_map_end(hole_em);
+
+ if (calc_end <= start || (hole_em->start > end)) {
+ free_extent_map(hole_em);
+ hole_em = NULL;
+ } else {
+ hole_start = max(hole_em->start, start);
+ hole_len = calc_end - hole_start;
+ }
+ }
+ em->bdev = NULL;
+ if (hole_em && range_start > hole_start) {
+ /* our hole starts before our delalloc, so we
+ * have to return just the parts of the hole
+ * that go until the delalloc starts
+ */
+ em->len = min(hole_len,
+ range_start - hole_start);
+ em->start = hole_start;
+ em->orig_start = hole_start;
+ /*
+ * don't adjust block start at all,
+ * it is fixed at EXTENT_MAP_HOLE
+ */
+ em->block_start = hole_em->block_start;
+ em->block_len = hole_len;
+ } else {
+ em->start = range_start;
+ em->len = found;
+ em->orig_start = range_start;
+ em->block_start = EXTENT_MAP_DELALLOC;
+ em->block_len = found;
+ }
+ } else if (hole_em) {
+ return hole_em;
+ }
+out:
+
+ free_extent_map(hole_em);
+ if (err) {
+ free_extent_map(em);
+ return ERR_PTR(err);
+ }
+ return em;
+}
+
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
@@ -5934,6 +6055,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
if (!skip_sum) {
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
if (!dip->csums) {
+ kfree(dip);
ret = -ENOMEM;
goto free_ordered;
}
@@ -6102,7 +6224,7 @@ out:
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
- return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+ return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
int btrfs_readpage(struct file *file, struct page *page)
@@ -6982,7 +7104,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5..5fdb2abc4fa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
- if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+ if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
return -EINVAL;
if (flags & ~BTRFS_SUBVOL_RDONLY)
return -EOPNOTSUPP;
+ if (!is_owner_or_cap(inode))
+ return -EACCES;
+
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_reset;
}
- ret = btrfs_update_root(trans, root,
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399d..a178f5ebea7 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
unsigned long tot_out;
unsigned long tot_len;
char *buf;
+ bool may_late_unmap, need_unmap;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
tot_in += in_len;
working_bytes = in_len;
+ may_late_unmap = need_unmap = false;
/* fast path: avoid using the working buffer */
if (in_page_bytes_left >= in_len) {
buf = data_in + in_offset;
bytes = in_len;
+ may_late_unmap = true;
goto cont;
}
@@ -329,14 +332,17 @@ cont:
if (working_bytes == 0 && tot_in >= tot_len)
break;
- kunmap(pages_in[page_in_index]);
- page_in_index++;
- if (page_in_index >= total_pages_in) {
+ if (page_in_index + 1 >= total_pages_in) {
ret = -1;
- data_in = NULL;
goto done;
}
- data_in = kmap(pages_in[page_in_index]);
+
+ if (may_late_unmap)
+ need_unmap = true;
+ else
+ kunmap(pages_in[page_in_index]);
+
+ data_in = kmap(pages_in[++page_in_index]);
in_page_bytes_left = PAGE_CACHE_SIZE;
in_offset = 0;
@@ -346,6 +352,8 @@ cont:
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
+ if (need_unmap)
+ kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "btrfs decompress failed\n");
ret = -1;
@@ -363,8 +371,7 @@ cont:
break;
}
done:
- if (data_in)
- kunmap(pages_in[page_in_index]);
+ kunmap(pages_in[page_in_index]);
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed944..31ade5802ae 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
u32 item_size;
int ret;
int err = 0;
+ int progress = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
while (1) {
+ progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
BUG_ON(IS_ERR(trans));
-
+restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
}
}
+ if (trans && progress && err == -ENOSPC) {
+ ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+ rc->block_group->flags);
+ if (ret == 0) {
+ err = 0;
+ progress = 0;
+ goto restart;
+ }
+ }
btrfs_release_path(rc->extent_root, path);
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d2..d39a9895d93 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
- Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+ Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+ Opt_enospc_debug, Opt_err,
};
static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
{Opt_space_cache, "space_cache"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+ {Opt_enospc_debug, "enospc_debug"},
{Opt_err, NULL},
};
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_user_subvol_rm_allowed:
btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
break;
+ case Opt_enospc_debug:
+ btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index af7dbca1527..dd13eb81ee4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = btrfs_shrink_device(device, 0);
if (ret)
- goto error_brelse;
+ goto error_undo;
ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
if (ret)
- goto error_brelse;
+ goto error_undo;
device->in_fs_metadata = 0;
@@ -1416,6 +1416,13 @@ out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
return ret;
+error_undo:
+ if (device->writeable) {
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
+ root->fs_info->fs_devices->rw_devices++;
+ }
+ goto error_brelse;
}
/*
@@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
- device->mode = 0;
+ device->mode = FMODE_EXCL;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5776531dc2..d779cefcfd7 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -370,7 +370,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
}
int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
@@ -378,7 +379,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
char *suffix;
char *name;
- err = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &suffix, &value,
+ &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 7a43fd640bb..b3cc8039134 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir);
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
#endif /* __XATTR__ */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 42c7fafc8bf..a0358c2189c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -275,6 +275,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
bool preemptive)
{
struct dentry *grave, *trap;
+ struct path path, path_to_graveyard;
char nbuffer[8 + 8 + 1];
int ret;
@@ -287,10 +288,18 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
/* non-directories can just be unlinked */
if (!S_ISDIR(rep->d_inode->i_mode)) {
_debug("unlink stale object");
- ret = vfs_unlink(dir->d_inode, rep);
- if (preemptive)
- cachefiles_mark_object_buried(cache, rep);
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ ret = security_path_unlink(&path, rep);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Unlink security error");
+ } else {
+ ret = vfs_unlink(dir->d_inode, rep);
+
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+ }
mutex_unlock(&dir->d_inode->i_mutex);
@@ -379,12 +388,23 @@ try_again:
}
/* attempt the rename */
- ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave);
- if (ret != 0 && ret != -ENOMEM)
- cachefiles_io_error(cache, "Rename failed with error %d", ret);
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ path_to_graveyard.mnt = cache->mnt;
+ path_to_graveyard.dentry = cache->graveyard;
+ ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Rename security error %d", ret);
+ } else {
+ ret = vfs_rename(dir->d_inode, rep,
+ cache->graveyard->d_inode, grave);
+ if (ret != 0 && ret != -ENOMEM)
+ cachefiles_io_error(cache,
+ "Rename failed with error %d", ret);
- if (preemptive)
- cachefiles_mark_object_buried(cache, rep);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+ }
unlock_rename(cache->graveyard, dir);
dput(grave);
@@ -448,6 +468,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
{
struct cachefiles_cache *cache;
struct dentry *dir, *next = NULL;
+ struct path path;
unsigned long start;
const char *name;
int ret, nlen;
@@ -458,6 +479,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
cache = container_of(parent->fscache.cache,
struct cachefiles_cache, cache);
+ path.mnt = cache->mnt;
ASSERT(parent->dentry);
ASSERT(parent->dentry->d_inode);
@@ -511,6 +533,10 @@ lookup_again:
if (ret < 0)
goto create_error;
+ path.dentry = dir;
+ ret = security_path_mkdir(&path, next, 0);
+ if (ret < 0)
+ goto create_error;
start = jiffies;
ret = vfs_mkdir(dir->d_inode, next, 0);
cachefiles_hist(cachefiles_mkdir_histogram, start);
@@ -536,6 +562,10 @@ lookup_again:
if (ret < 0)
goto create_error;
+ path.dentry = dir;
+ ret = security_path_mknod(&path, next, S_IFREG, 0);
+ if (ret < 0)
+ goto create_error;
start = jiffies;
ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
cachefiles_hist(cachefiles_create_histogram, start);
@@ -692,6 +722,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
{
struct dentry *subdir;
unsigned long start;
+ struct path path;
int ret;
_enter(",,%s", dirname);
@@ -719,6 +750,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
_debug("attempt mkdir");
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ ret = security_path_mkdir(&path, subdir, 0700);
+ if (ret < 0)
+ goto mkdir_error;
ret = vfs_mkdir(dir->d_inode, subdir, 0700);
if (ret < 0)
goto mkdir_error;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f0aef787a10..ebafa65a29b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -60,7 +60,6 @@ int ceph_init_dentry(struct dentry *dentry)
}
di->dentry = dentry;
di->lease_session = NULL;
- di->parent_inode = igrab(dentry->d_parent->d_inode);
dentry->d_fsdata = di;
dentry->d_time = jiffies;
ceph_dentry_lru_add(dentry);
@@ -410,7 +409,7 @@ more:
spin_lock(&inode->i_lock);
if (ci->i_release_count == fi->dir_release_count) {
dout(" marking %p complete\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = filp->f_pos;
}
spin_unlock(&inode->i_lock);
@@ -497,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
/* .snap dir? */
if (err == -ENOENT &&
+ ceph_snap(parent) == CEPH_NOSNAP &&
strcmp(dentry->d_name.name,
fsc->mount_options->snapdir_name) == 0) {
struct inode *inode = ceph_get_snapdir(parent);
@@ -993,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
@@ -1030,28 +1030,8 @@ out_touch:
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *parent_inode = NULL;
- u64 snapid = CEPH_NOSNAP;
- if (!IS_ROOT(dentry)) {
- parent_inode = di->parent_inode;
- if (parent_inode)
- snapid = ceph_snap(parent_inode);
- }
- dout("dentry_release %p parent %p\n", dentry, parent_inode);
- if (parent_inode && snapid != CEPH_SNAPDIR) {
- struct ceph_inode_info *ci = ceph_inode(parent_inode);
-
- spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen ||
- snapid <= CEPH_MAXSNAP) {
- dout(" clearing %p complete (d_release)\n",
- parent_inode);
- ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
- ci->i_release_count++;
- }
- spin_unlock(&parent_inode->i_lock);
- }
+ dout("dentry_release %p\n", dentry);
if (di) {
ceph_dentry_lru_del(dentry);
if (di->lease_session)
@@ -1059,8 +1039,6 @@ static void ceph_dentry_release(struct dentry *dentry)
kmem_cache_free(ceph_dentry_cachep, di);
dentry->d_fsdata = NULL;
}
- if (parent_inode)
- iput(parent_inode);
}
static int ceph_snapdir_d_revalidate(struct dentry *dentry,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa47..193bfa5e9cb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = 2;
}
break;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88fcaa21b80..20b907d76ae 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -207,7 +207,6 @@ struct ceph_dentry_info {
struct dentry *dentry;
u64 time;
u64 offset;
- struct inode *parent_inode;
};
struct ceph_inode_xattrs_info {
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6c..c6d31a3bab8 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
*/
asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs(buf, &tmp);
- path_put(&path);
- }
+ struct kstatfs tmp;
+ int error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
{
- struct file * file;
struct kstatfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ int error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct path path;
+ struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs64(buf, &tmp);
- path_put(&path);
- }
+ error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs64(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct file * file;
struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs64(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PREAD)
+ ret = compat_readv(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PWRITE)
+ ret = compat_writev(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
}
#endif /* CONFIG_TIMERFD */
+
+#ifdef CONFIG_FHANDLE
+/*
+ * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
+ * doesn't set the O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_open_by_handle_at(int mountdirfd,
+ struct file_handle __user *handle, int flags)
+{
+ return do_handle_open(mountdirfd, handle, flags);
+}
+#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae9..a39fe47c466 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(parent->d_lock)
__releases(dentry->d_inode->i_lock)
{
- dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
+ /*
+ * Inform try_to_ascend() that we are no longer attached to the
+ * dentry tree
+ */
+ dentry->d_flags |= DCACHE_DISCONNECTED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
/*
+ * This tries to ascend one level of parenthood, but
+ * we can race with renaming, so we need to re-check
+ * the parenthood after dropping the lock and check
+ * that the sequence number still matches.
+ */
+static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+{
+ struct dentry *new = old->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&old->d_lock);
+ spin_lock(&new->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (new != old->d_parent ||
+ (old->d_flags & DCACHE_DISCONNECTED) ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&new->d_lock);
+ new = NULL;
+ }
+ rcu_read_unlock();
+ return new;
+}
+
+
+/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1181,24 +1200,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
}
EXPORT_SYMBOL(d_alloc_root);
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (list_empty(&inode->i_dentry))
+ return NULL;
+ alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+ __dget(alias);
+ return alias;
+}
+
+static struct dentry * d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+
+
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
if (IS_ERR(inode))
return ERR_CAST(inode);
- res = d_find_alias(inode);
+ res = d_find_any_alias(inode);
if (res)
goto out_iput;
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
spin_lock(&inode->i_lock);
- res = __d_find_alias(inode, 0);
+ res = __d_find_any_alias(inode);
if (res) {
spin_unlock(&inode->i_lock);
dput(tmp);
@@ -2920,28 +2947,14 @@ resume:
spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
+ struct dentry *child = this_parent;
if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
this_parent->d_flags |= DCACHE_GENOCIDE;
this_parent->d_count--;
}
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 37a8ca7c122..e7a7a2f0732 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -13,9 +13,6 @@
*
*/
-/* uncomment to get debug messages from the debug filesystem, ah the irony. */
-/* #define DEBUG */
-
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
@@ -310,7 +307,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
-static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
+static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
{
int ret = 0;
@@ -333,6 +330,7 @@ static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
dput(dentry);
}
}
+ return ret;
}
/**
@@ -351,7 +349,8 @@ static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
void debugfs_remove(struct dentry *dentry)
{
struct dentry *parent;
-
+ int ret;
+
if (!dentry)
return;
@@ -360,9 +359,10 @@ void debugfs_remove(struct dentry *dentry)
return;
mutex_lock(&parent->d_inode->i_mutex);
- __debugfs_remove(dentry, parent);
+ ret = __debugfs_remove(dentry, parent);
mutex_unlock(&parent->d_inode->i_mutex);
- simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ if (!ret)
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
EXPORT_SYMBOL_GPL(debugfs_remove);
@@ -540,17 +540,5 @@ static int __init debugfs_init(void)
return retval;
}
-
-static void __exit debugfs_exit(void)
-{
- debugfs_registered = false;
-
- simple_release_fs(&debugfs_mount, &debugfs_mount_count);
- unregister_filesystem(&debug_fs_type);
- kobject_put(debug_kobj);
-}
-
core_initcall(debugfs_init);
-module_exit(debugfs_exit);
-MODULE_LICENSE("GPL");
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada454..4a09af9e9a6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
* cleanup path and it is also acquired by eventpoll_release_file()
* if a file has been pushed inside an epoll set and it is then
* close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
+ * It is also acquired when inserting an epoll fd onto another epoll
+ * fd. We do this so that we walk the epoll tree and ensure that this
+ * insertion does not create a cycle of epoll file descriptors, which
+ * could lead to deadlock. We need a global mutex to prevent two
+ * simultaneous inserts (A into B and B into A) from racing and
+ * constructing a cycle without either insert observing that it is
+ * going to.
* It is possible to drop the "ep->mtx" and to use the global
* mutex "epmutex" (together with "ep->lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+/* Used to check for epoll file descriptor inclusion loops */
+static struct nested_calls poll_loop_ncalls;
+
/* Used for safe wake up implementation */
static struct nested_calls poll_safewake_ncalls;
@@ -1198,6 +1208,62 @@ retry:
return res;
}
+/**
+ * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
+ * API, to verify that adding an epoll file inside another
+ * epoll structure, does not violate the constraints, in
+ * terms of closed loops, or too deep chains (which can
+ * result in excessive stack usage).
+ *
+ * @priv: Pointer to the epoll file to be currently checked.
+ * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
+ * data structure pointer.
+ * @call_nests: Current dept of the @ep_call_nested() call stack.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+{
+ int error = 0;
+ struct file *file = priv;
+ struct eventpoll *ep = file->private_data;
+ struct rb_node *rbp;
+ struct epitem *epi;
+
+ mutex_lock(&ep->mtx);
+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ epi = rb_entry(rbp, struct epitem, rbn);
+ if (unlikely(is_file_epoll(epi->ffd.file))) {
+ error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, epi->ffd.file,
+ epi->ffd.file->private_data, current);
+ if (error != 0)
+ break;
+ }
+ }
+ mutex_unlock(&ep->mtx);
+
+ return error;
+}
+
+/**
+ * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
+ * another epoll file (represented by @ep) does not create
+ * closed loops or too deep chains.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @file: Pointer to the epoll file to be checked.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check(struct eventpoll *ep, struct file *file)
+{
+ return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, file, ep, current);
+}
+
/*
* Open an eventpoll file descriptor.
*/
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
+ int did_lock_epmutex = 0;
struct file *file, *tfile;
struct eventpoll *ep;
struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
*/
ep = file->private_data;
+ /*
+ * When we insert an epoll file descriptor, inside another epoll file
+ * descriptor, there is the change of creating closed loops, which are
+ * better be handled here, than in more critical paths.
+ *
+ * We hold epmutex across the loop check and the insert in this case, in
+ * order to prevent two separate inserts from racing and each doing the
+ * insert "at the same time" such that ep_loop_check passes on both
+ * before either one does the insert, thereby creating a cycle.
+ */
+ if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+ mutex_lock(&epmutex);
+ did_lock_epmutex = 1;
+ error = -ELOOP;
+ if (ep_loop_check(ep, tfile) != 0)
+ goto error_tgt_fput;
+ }
+
+
mutex_lock(&ep->mtx);
/*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
+ if (unlikely(did_lock_epmutex))
+ mutex_unlock(&epmutex);
+
fput(tfile);
error_fput:
fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
EP_ITEM_COST;
BUG_ON(max_user_watches < 0);
+ /*
+ * Initialize the structure used to perform epoll file descriptor
+ * inclusion loops checks.
+ */
+ ep_nested_calls_init(&poll_loop_ncalls);
+
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6a..ba99e1abb1a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
struct file *file;
char *tmp = getname(library);
int error = PTR_ERR(tmp);
+ static const struct open_flags uselib_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
if (IS_ERR(tmp))
goto out;
- file = do_filp_open(AT_FDCWD, tmp,
- O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
- MAY_READ | MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
putname(tmp);
error = PTR_ERR(file);
if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
{
struct file *file;
int err;
+ static const struct open_flags open_exec_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
- file = do_filp_open(AT_FDCWD, name,
- O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
- MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
if (IS_ERR(file))
goto out;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d0283..4d70db110cf 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
err = exofs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME;
if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= EXOFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = exofs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
exofs_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd..b05acb79613 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
struct inode * inode = dentry->d_inode;
int len = *max_len;
int type = FILEID_INO32_GEN;
-
- if (len < 2 || (connectable && len < 4))
+
+ if (connectable && (len < 4)) {
+ *max_len = 4;
+ return 255;
+ } else if (len < 2) {
+ *max_len = 2;
return 255;
+ }
len = 2;
fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
/*
* Try to get any dentry for the given file handle from the filesystem.
*/
+ if (!nop || !nop->fh_to_dentry)
+ return ERR_PTR(-ESTALE);
result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
if (!result)
result = ERR_PTR(-ESTALE);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 6346a2acf32..1b48c337087 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
/* ialloc.c */
-extern struct inode * ext2_new_inode (struct inode *, int);
+extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *);
extern void ext2_free_inode (struct inode *);
extern unsigned long ext2_count_free_inodes (struct super_block *);
extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad70479aabf..ee9ed31948e 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,8 @@ found:
return group;
}
-struct inode *ext2_new_inode(struct inode *dir, int mode)
+struct inode *ext2_new_inode(struct inode *dir, int mode,
+ const struct qstr *qstr)
{
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
@@ -585,7 +586,7 @@ got:
if (err)
goto fail_free_drop;
- err = ext2_init_security(inode,dir);
+ err = ext2_init_security(inode, dir, qstr);
if (err)
goto fail_free_drop;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d82..ed5c5d496ee 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,7 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
dquot_initialize(dir);
- inode = ext2_new_inode(dir, mode);
+ inode = ext2_new_inode(dir, mode, &dentry->d_name);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -133,7 +133,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
dquot_initialize(dir);
- inode = ext2_new_inode (dir, mode);
+ inode = ext2_new_inode (dir, mode, &dentry->d_name);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@@ -159,7 +159,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
dquot_initialize(dir);
- inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
+ inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO, &dentry->d_name);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out;
@@ -230,7 +230,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
inode_inc_link_count(dir);
- inode = ext2_new_inode (dir, S_IFDIR | mode);
+ inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_dir;
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= EXT2_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ext2_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(old_inode);
ext2_delete_entry (old_de, old_page);
- inode_dec_link_count(old_inode);
if (dir_de) {
if (old_dir != new_dir)
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index a1a1c218461..5e41cccff76 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,9 +116,11 @@ exit_ext2_xattr(void)
# endif /* CONFIG_EXT2_FS_XATTR */
#ifdef CONFIG_EXT2_FS_SECURITY
-extern int ext2_init_security(struct inode *inode, struct inode *dir);
+extern int ext2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
#else
-static inline int ext2_init_security(struct inode *inode, struct inode *dir)
+static inline int ext2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 3004e15d5da..5d979b4347b 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -47,14 +47,15 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
}
int
-ext2_init_security(struct inode *inode, struct inode *dir)
+ext2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(inode, dir, &name, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9724aef2246..bfc2dc43681 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -404,7 +404,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
-struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
+ const struct qstr *qstr, int mode)
{
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
@@ -589,7 +590,7 @@ got:
if (err)
goto fail_free_drop;
- err = ext3_init_security(handle,inode, dir);
+ err = ext3_init_security(handle, inode, dir, qstr);
if (err)
goto fail_free_drop;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810e..0521a007ae6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1710,7 +1710,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, mode);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext3_file_inode_operations;
@@ -1746,7 +1746,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, mode);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@@ -1784,7 +1784,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -2206,7 +2206,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f247..9cc19a1dea8 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext3_qctl_operations;
sb->dq_op = &ext3_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 377fe720116..2be4f69bfa6 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -128,10 +128,10 @@ exit_ext3_xattr(void)
#ifdef CONFIG_EXT3_FS_SECURITY
extern int ext3_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir);
+ struct inode *dir, const struct qstr *qstr);
#else
static inline int ext3_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir)
+ struct inode *dir, const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 03a99bfc59f..b8d9f83aa5c 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -49,14 +49,15 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
}
int
-ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(inode, dir, &name, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index eb9097aec6f..78b79e1bd7e 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1042,7 +1042,7 @@ got:
if (err)
goto fail_free_drop;
- err = ext4_init_security(handle, inode, dir);
+ err = ext4_init_security(handle, inode, dir, qstr);
if (err)
goto fail_free_drop;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c..e781b7ea563 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b..203f9e4a70b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext4_qctl_operations;
sb->dq_op = &ext4_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
@@ -3509,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
no_journal:
- EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
+ /*
+ * The maximum number of concurrent works can be high and
+ * concurrency isn't really necessary. Limit it to 1.
+ */
+ EXT4_SB(sb)->dio_unwritten_wq =
+ alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
goto failed_mount_wq;
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 1ef16520b95..25b7387ff18 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -145,10 +145,10 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
#ifdef CONFIG_EXT4_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir);
+ struct inode *dir, const struct qstr *qstr);
#else
static inline int ext4_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir)
+ struct inode *dir, const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 9b21268e121..007c3bfbf09 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -49,14 +49,15 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
}
int
-ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(inode, dir, &name, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd..0e277ec4b61 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
struct inode *inode = de->d_inode;
u32 ipos_h, ipos_m, ipos_l;
- if (len < 5)
+ if (len < 5) {
+ *lenp = 5;
return 255; /* no room */
+ }
ipos_h = MSDOS_I(inode)->i_pos >> 8;
ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd..adae3fb7451 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bd..6c82e5bac03 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
SYSCALL_DEFINE1(dup, unsigned int, fildes)
{
int ret = -EBADF;
- struct file *file = fget(fildes);
+ struct file *file = fget_raw(fildes);
if (file) {
ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
return err;
}
+static int check_fcntl_cmd(unsigned cmd)
+{
+ switch (cmd) {
+ case F_DUPFD:
+ case F_DUPFD_CLOEXEC:
+ case F_GETFD:
+ case F_SETFD:
+ case F_GETFL:
+ return 1;
+ }
+ return 0;
+}
+
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
struct file *filp;
long err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
long err;
err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC
+ __FMODE_EXEC | O_PATH
));
fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 00000000000..bf93ad2bee0
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/exportfs.h>
+#include <linux/fs_struct.h>
+#include <linux/fsnotify.h>
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static long do_sys_name_to_handle(struct path *path,
+ struct file_handle __user *ufh,
+ int __user *mnt_id)
+{
+ long retval;
+ struct file_handle f_handle;
+ int handle_dwords, handle_bytes;
+ struct file_handle *handle = NULL;
+
+ /*
+ * We need t make sure wether the file system
+ * support decoding of the file handle
+ */
+ if (!path->mnt->mnt_sb->s_export_op ||
+ !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
+ return -EFAULT;
+
+ if (f_handle.handle_bytes > MAX_HANDLE_SZ)
+ return -EINVAL;
+
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ /* convert handle size to multiple of sizeof(u32) */
+ handle_dwords = f_handle.handle_bytes >> 2;
+
+ /* we ask for a non connected handle */
+ retval = exportfs_encode_fh(path->dentry,
+ (struct fid *)handle->f_handle,
+ &handle_dwords, 0);
+ handle->handle_type = retval;
+ /* convert handle size to bytes */
+ handle_bytes = handle_dwords * sizeof(u32);
+ handle->handle_bytes = handle_bytes;
+ if ((handle->handle_bytes > f_handle.handle_bytes) ||
+ (retval == 255) || (retval == -ENOSPC)) {
+ /* As per old exportfs_encode_fh documentation
+ * we could return ENOSPC to indicate overflow
+ * But file system returned 255 always. So handle
+ * both the values
+ */
+ /*
+ * set the handle size to zero so we copy only
+ * non variable part of the file_handle
+ */
+ handle_bytes = 0;
+ retval = -EOVERFLOW;
+ } else
+ retval = 0;
+ /* copy the mount id */
+ if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
+ copy_to_user(ufh, handle,
+ sizeof(struct file_handle) + handle_bytes))
+ retval = -EFAULT;
+ kfree(handle);
+ return retval;
+}
+
+/**
+ * sys_name_to_handle_at: convert name to handle
+ * @dfd: directory relative to which name is interpreted if not absolute
+ * @name: name that should be converted to handle.
+ * @handle: resulting file handle
+ * @mnt_id: mount id of the file system containing the file
+ * @flag: flag value to indicate whether to follow symlink or not
+ *
+ * @handle->handle_size indicate the space available to store the
+ * variable part of the file handle in bytes. If there is not
+ * enough space, the field is updated to return the minimum
+ * value required.
+ */
+SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
+ struct file_handle __user *, handle, int __user *, mnt_id,
+ int, flag)
+{
+ struct path path;
+ int lookup_flags;
+ int err;
+
+ if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
+ return -EINVAL;
+
+ lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ err = user_path_at(dfd, name, lookup_flags, &path);
+ if (!err) {
+ err = do_sys_name_to_handle(&path, handle, mnt_id);
+ path_put(&path);
+ }
+ return err;
+}
+
+static struct vfsmount *get_vfsmount_from_fd(int fd)
+{
+ struct path path;
+
+ if (fd == AT_FDCWD) {
+ struct fs_struct *fs = current->fs;
+ spin_lock(&fs->lock);
+ path = fs->pwd;
+ mntget(path.mnt);
+ spin_unlock(&fs->lock);
+ } else {
+ int fput_needed;
+ struct file *file = fget_light(fd, &fput_needed);
+ if (!file)
+ return ERR_PTR(-EBADF);
+ path = file->f_path;
+ mntget(path.mnt);
+ fput_light(file, fput_needed);
+ }
+ return path.mnt;
+}
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+ return 1;
+}
+
+static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
+ struct path *path)
+{
+ int retval = 0;
+ int handle_dwords;
+
+ path->mnt = get_vfsmount_from_fd(mountdirfd);
+ if (IS_ERR(path->mnt)) {
+ retval = PTR_ERR(path->mnt);
+ goto out_err;
+ }
+ /* change the handle size to multiple of sizeof(u32) */
+ handle_dwords = handle->handle_bytes >> 2;
+ path->dentry = exportfs_decode_fh(path->mnt,
+ (struct fid *)handle->f_handle,
+ handle_dwords, handle->handle_type,
+ vfs_dentry_acceptable, NULL);
+ if (IS_ERR(path->dentry)) {
+ retval = PTR_ERR(path->dentry);
+ goto out_mnt;
+ }
+ return 0;
+out_mnt:
+ mntput(path->mnt);
+out_err:
+ return retval;
+}
+
+static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+ struct path *path)
+{
+ int retval = 0;
+ struct file_handle f_handle;
+ struct file_handle *handle = NULL;
+
+ /*
+ * With handle we don't look at the execute bit on the
+ * the directory. Ideally we would like CAP_DAC_SEARCH.
+ * But we don't have that
+ */
+ if (!capable(CAP_DAC_READ_SEARCH)) {
+ retval = -EPERM;
+ goto out_err;
+ }
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
+ retval = -EFAULT;
+ goto out_err;
+ }
+ if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
+ (f_handle.handle_bytes == 0)) {
+ retval = -EINVAL;
+ goto out_err;
+ }
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle) {
+ retval = -ENOMEM;
+ goto out_err;
+ }
+ /* copy the full handle */
+ if (copy_from_user(handle, ufh,
+ sizeof(struct file_handle) +
+ f_handle.handle_bytes)) {
+ retval = -EFAULT;
+ goto out_handle;
+ }
+
+ retval = do_handle_to_path(mountdirfd, handle, path);
+
+out_handle:
+ kfree(handle);
+out_err:
+ return retval;
+}
+
+long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag)
+{
+ long retval = 0;
+ struct path path;
+ struct file *file;
+ int fd;
+
+ retval = handle_to_path(mountdirfd, ufh, &path);
+ if (retval)
+ return retval;
+
+ fd = get_unused_fd_flags(open_flag);
+ if (fd < 0) {
+ path_put(&path);
+ return fd;
+ }
+ file = file_open_root(path.dentry, path.mnt, "", open_flag);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ retval = PTR_ERR(file);
+ } else {
+ retval = fd;
+ fsnotify_open(file);
+ fd_install(fd, file);
+ }
+ path_put(&path);
+ return retval;
+}
+
+/**
+ * sys_open_by_handle_at: Open the file handle
+ * @mountdirfd: directory file descriptor
+ * @handle: file handle to be opened
+ * @flag: open flags.
+ *
+ * @mountdirfd indicate the directory file descriptor
+ * of the mount point. file handle is decoded relative
+ * to the vfsmount pointed by the @mountdirfd. @flags
+ * value is same as the open(2) flags.
+ */
+SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
+ struct file_handle __user *, handle,
+ int, flags)
+{
+ long ret;
+
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+
+ ret = do_handle_open(mountdirfd, handle, flags);
+ return ret;
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e2..01e4c1e8e6b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -190,7 +190,8 @@ struct file *alloc_file(struct path *path, fmode_t mode,
file_take_write(file);
WARN_ON(mnt_clone_write(path->mnt));
}
- ima_counts_get(file);
+ if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ i_readcount_inc(path->dentry->d_inode);
return file;
}
EXPORT_SYMBOL(alloc_file);
@@ -246,11 +247,15 @@ static void __fput(struct file *file)
file->f_op->release(inode, file);
security_file_free(file);
ima_file_free(file);
- if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
+ if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
+ !(file->f_mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
+ }
fops_put(file->f_op);
put_pid(file->f_owner.pid);
file_sb_list_del(file);
+ if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ i_readcount_dec(inode);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
@@ -276,11 +281,10 @@ struct file *fget(unsigned int fd)
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- if (!atomic_long_inc_not_zero(&file->f_count)) {
- /* File object ref couldn't be taken */
- rcu_read_unlock();
- return NULL;
- }
+ /* File object ref couldn't be taken */
+ if (file->f_mode & FMODE_PATH ||
+ !atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
}
rcu_read_unlock();
@@ -289,6 +293,25 @@ struct file *fget(unsigned int fd)
EXPORT_SYMBOL(fget);
+struct file *fget_raw(unsigned int fd)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ /* File object ref couldn't be taken */
+ if (!atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ }
+ rcu_read_unlock();
+
+ return file;
+}
+
+EXPORT_SYMBOL(fget_raw);
+
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
*
@@ -313,6 +336,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
*fput_needed = 0;
if (atomic_read(&files->count) == 1) {
file = fcheck_files(files, fd);
+ if (file && (file->f_mode & FMODE_PATH))
+ file = NULL;
+ } else {
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ if (!(file->f_mode & FMODE_PATH) &&
+ atomic_long_inc_not_zero(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
+ }
+ rcu_read_unlock();
+ }
+
+ return file;
+}
+
+struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ *fput_needed = 0;
+ if (atomic_read(&files->count) == 1) {
+ file = fcheck_files(files, fd);
} else {
rcu_read_lock();
file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed8..8bd0ef9286c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
{
struct inode *inode;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
if (err)
return err;
- if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
- return 0;
+ if (attr->ia_valid & ATTR_OPEN) {
+ if (fc->atomic_o_trunc)
+ return 0;
+ file = NULL;
+ }
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c82..9e0832dbb1e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
+static void fuse_release_async(struct work_struct *work)
+{
+ struct fuse_req *req;
+ struct fuse_conn *fc;
+ struct path path;
+
+ req = container_of(work, struct fuse_req, misc.release.work);
+ path = req->misc.release.path;
+ fc = get_fuse_conn(path.dentry->d_inode);
+
+ fuse_put_request(fc, req);
+ path_put(&path);
+}
+
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- path_put(&req->misc.release.path);
+ if (fc->destroy_req) {
+ /*
+ * If this is a fuseblk mount, then it's possible that
+ * releasing the path will result in releasing the
+ * super block and sending the DESTROY request. If
+ * the server is single threaded, this would hang.
+ * For this reason do the path_put() in a separate
+ * thread.
+ */
+ atomic_inc(&req->count);
+ INIT_WORK(&req->misc.release.work, fuse_release_async);
+ schedule_work(&req->misc.release.work);
+ } else {
+ path_put(&req->misc.release.path);
+ }
}
-static void fuse_file_put(struct fuse_file *ff)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
{
if (atomic_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- req->end = fuse_release_end;
- fuse_request_send_background(ff->fc, req);
+ if (sync) {
+ fuse_request_send(ff->fc, req);
+ path_put(&req->misc.release.path);
+ fuse_put_request(ff->fc, req);
+ } else {
+ req->end = fuse_release_end;
+ fuse_request_send_background(ff->fc, req);
+ }
kfree(ff);
}
}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
* Normally this will send the RELEASE request, however if
* some asynchronous READ or WRITE requests are outstanding,
* the sending will be delayed.
+ *
+ * Make the release synchronous if this is a fuseblk mount,
+ * synchronous RELEASE is allowed (and desirable) in this case
+ * because the server can be trusted not to screw up.
*/
- fuse_file_put(ff);
+ fuse_file_put(ff, ff->fc->destroy_req != NULL);
}
static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
page_cache_release(page);
}
if (req->ff)
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
__free_page(req->pages[0]);
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e..d4286947bc2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/poll.h>
+#include <linux/workqueue.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
struct {
- struct fuse_release_in in;
+ union {
+ struct fuse_release_in in;
+ struct work_struct work;
+ };
struct path path;
} release;
struct fuse_init_in init_in;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd..051b1a08452 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
u64 nodeid;
u32 generation;
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
nodeid = get_fuse_inode(inode)->nodeid;
generation = inode->i_generation;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7118f1a780a..cbc07155b1a 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
struct posix_acl *acl;
int error;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
if (IS_ERR(acl))
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f8832b9..aad77e4f61b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -695,6 +695,7 @@ out:
if (error == 0)
return 0;
+ unlock_page(page);
page_cache_release(page);
gfs2_trans_end(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c4039d5eef..ef3dc4b9fae 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -21,6 +21,7 @@
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
+#include "super.h"
#include "trans.h"
#include "dir.h"
#include "util.h"
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrp_list rlist;
u64 bn, bstart;
- u32 blen;
+ u32 blen, btotal;
__be64 *p;
unsigned int rg_blocks = 0;
int metadata;
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
bstart = 0;
blen = 0;
+ btotal = 0;
for (p = top; p < bottom; p++) {
if (!*p)
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
else {
if (bstart) {
if (metadata)
- gfs2_free_meta(ip, bstart, blen);
+ __gfs2_free_meta(ip, bstart, blen);
else
- gfs2_free_data(ip, bstart, blen);
+ __gfs2_free_data(ip, bstart, blen);
+
+ btotal += blen;
}
bstart = bn;
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
}
if (bstart) {
if (metadata)
- gfs2_free_meta(ip, bstart, blen);
+ __gfs2_free_meta(ip, bstart, blen);
else
- gfs2_free_data(ip, bstart, blen);
+ __gfs2_free_data(ip, bstart, blen);
+
+ btotal += blen;
}
+ gfs2_statfs_change(sdp, 0, +btotal, 0);
+ gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
+ ip->i_inode.i_gid);
+
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b87..0da8da2c991 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
int error;
int had_lock = 0;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f..b5a5e60df0d 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
struct super_block *sb = inode->i_sb;
struct gfs2_inode *ip = GFS2_I(inode);
- if (*len < GFS2_SMALL_FH_SIZE ||
- (connectable && *len < GFS2_LARGE_FH_SIZE))
+ if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
+ *len = GFS2_LARGE_FH_SIZE;
return 255;
+ } else if (*len < GFS2_SMALL_FH_SIZE) {
+ *len = GFS2_SMALL_FH_SIZE;
+ return 255;
+ }
fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7cfdcb91336..4074b952b05 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
- if (!(file->f_flags & O_NOATIME)) {
+ if (!(file->f_flags & O_NOATIME) &&
+ !IS_NOATIME(&ip->i_inode)) {
struct gfs2_holder i_gh;
int error;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
error = gfs2_glock_nq(&i_gh);
- file_accessed(file);
- if (error == 0)
- gfs2_glock_dq_uninit(&i_gh);
+ if (error == 0) {
+ file_accessed(file);
+ gfs2_glock_dq(&i_gh);
+ }
+ gfs2_holder_uninit(&i_gh);
+ if (error)
+ return error;
}
vma->vm_ops = &gfs2_vm_ops;
vma->vm_flags |= VM_CAN_NONLINEAR;
@@ -617,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from,
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
- page_zero_new_buffers(page, from, to);
- flush_dcache_page(page);
+ zero_user(page, from, to-from);
mark_page_accessed(page);
if (!gfs2_is_writeback(ip))
@@ -627,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from,
block_commit_write(page, from, to);
}
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+static int needs_empty_write(sector_t block, struct inode *inode)
{
- unsigned start, end, next;
- struct buffer_head *bh, *head;
int error;
+ struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
- if (!page_has_buffers(page)) {
- error = __block_write_begin(page, from, to - from, gfs2_block_map);
- if (unlikely(error))
- return error;
+ bh_map.b_size = 1 << inode->i_blkbits;
+ error = gfs2_block_map(inode, block, &bh_map, 0);
+ if (unlikely(error))
+ return error;
+ return !buffer_mapped(&bh_map);
+}
- empty_write_end(page, from, to);
- return 0;
- }
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ unsigned start, end, next, blksize;
+ sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ int ret;
- bh = head = page_buffers(page);
+ blksize = 1 << inode->i_blkbits;
next = end = 0;
while (next < from) {
- next += bh->b_size;
- bh = bh->b_this_page;
+ next += blksize;
+ block++;
}
start = next;
do {
- next += bh->b_size;
- if (buffer_mapped(bh)) {
+ next += blksize;
+ ret = needs_empty_write(block, inode);
+ if (unlikely(ret < 0))
+ return ret;
+ if (ret == 0) {
if (end) {
- error = __block_write_begin(page, start, end - start,
- gfs2_block_map);
- if (unlikely(error))
- return error;
+ ret = __block_write_begin(page, start, end - start,
+ gfs2_block_map);
+ if (unlikely(ret))
+ return ret;
empty_write_end(page, start, end);
end = 0;
}
@@ -664,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
}
else
end = next;
- bh = bh->b_this_page;
+ block++;
} while (next < to);
if (end) {
- error = __block_write_begin(page, start, end - start, gfs2_block_map);
- if (unlikely(error))
- return error;
+ ret = __block_write_begin(page, start, end - start, gfs2_block_map);
+ if (unlikely(ret))
+ return ret;
empty_write_end(page, start, end);
}
@@ -976,8 +987,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
mutex_lock(&fp->f_fl_mutex);
flock_lock_file_wait(file, fl);
- if (fl_gh->gh_gl)
- gfs2_glock_dq_uninit(fl_gh);
+ if (fl_gh->gh_gl) {
+ gfs2_glock_dq_wait(fl_gh);
+ gfs2_holder_uninit(fl_gh);
+ }
mutex_unlock(&fp->f_fl_mutex);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7cd9a5a68d5..e2431313491 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
#include <linux/freezer.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
+#include <linux/bit_spinlock.h>
#include "gfs2.h"
#include "incore.h"
@@ -41,10 +44,6 @@
#define CREATE_TRACE_POINTS
#include "trace_gfs2.h"
-struct gfs2_gl_hash_bucket {
- struct hlist_head hb_list;
-};
-
struct gfs2_glock_iter {
int hash; /* hash bucket index */
struct gfs2_sbd *sdp; /* incore superblock */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
typedef void (*glock_examiner) (struct gfs2_glock * gl);
-static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
-static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
+static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
static struct dentry *gfs2_root;
-/*
- * Despite what you might think, the numbers below are not arbitrary :-)
- * They are taken from the ipv4 routing hash code, which is well tested
- * and thus should be nearly optimal. Later on we might tweek the numbers
- * but for now this should be fine.
- *
- * The reason for putting the locks in a separate array from the list heads
- * is that we can have fewer locks than list heads and save memory. We use
- * the same hash function for both, but with a different hash mask.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
- defined(CONFIG_PROVE_LOCKING)
-
-#ifdef CONFIG_LOCKDEP
-# define GL_HASH_LOCK_SZ 256
-#else
-# if NR_CPUS >= 32
-# define GL_HASH_LOCK_SZ 4096
-# elif NR_CPUS >= 16
-# define GL_HASH_LOCK_SZ 2048
-# elif NR_CPUS >= 8
-# define GL_HASH_LOCK_SZ 1024
-# elif NR_CPUS >= 4
-# define GL_HASH_LOCK_SZ 512
-# else
-# define GL_HASH_LOCK_SZ 256
-# endif
-#endif
-
-/* We never want more locks than chains */
-#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
-# undef GL_HASH_LOCK_SZ
-# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
-#endif
-
-static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
-
-static inline rwlock_t *gl_lock_addr(unsigned int x)
-{
- return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
-}
-#else /* not SMP, so no spinlocks required */
-static inline rwlock_t *gl_lock_addr(unsigned int x)
-{
- return NULL;
-}
-#endif
-
/**
* gl_hash() - Turn glock number into hash bucket number
* @lock: The glock number
@@ -141,25 +91,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
return h;
}
-/**
- * glock_free() - Perform a few checks and then release struct gfs2_glock
- * @gl: The glock to release
- *
- * Also calls lock module to release its internal structure for this glock.
- *
- */
+static inline void spin_lock_bucket(unsigned int hash)
+{
+ struct hlist_bl_head *bl = &gl_hash_table[hash];
+ bit_spin_lock(0, (unsigned long *)bl);
+}
-static void glock_free(struct gfs2_glock *gl)
+static inline void spin_unlock_bucket(unsigned int hash)
+{
+ struct hlist_bl_head *bl = &gl_hash_table[hash];
+ __bit_spin_unlock(0, (unsigned long *)bl);
+}
+
+static void gfs2_glock_dealloc(struct rcu_head *rcu)
+{
+ struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
+
+ if (gl->gl_ops->go_flags & GLOF_ASPACE)
+ kmem_cache_free(gfs2_glock_aspace_cachep, gl);
+ else
+ kmem_cache_free(gfs2_glock_cachep, gl);
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
- struct address_space *mapping = gfs2_glock2aspace(gl);
- struct kmem_cache *cachep = gfs2_glock_cachep;
- GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
- trace_gfs2_glock_put(gl);
- if (mapping)
- cachep = gfs2_glock_aspace_cachep;
- sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
+ call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
}
/**
@@ -185,34 +145,49 @@ static int demote_ok(const struct gfs2_glock *gl)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
+ /* assert_spin_locked(&gl->gl_spin); */
+
if (gl->gl_state == LM_ST_UNLOCKED)
return 0;
- if (!list_empty(&gl->gl_holders))
+ if (test_bit(GLF_LFLUSH, &gl->gl_flags))
+ return 0;
+ if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
+ !list_empty(&gl->gl_holders))
return 0;
if (glops->go_demote_ok)
return glops->go_demote_ok(gl);
return 1;
}
+
/**
- * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
+ * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
* @gl: the glock
*
+ * If the glock is demotable, then we add it (or move it) to the end
+ * of the glock LRU list.
*/
-static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
{
- int may_reclaim;
- may_reclaim = (demote_ok(gl) &&
- (atomic_read(&gl->gl_ref) == 1 ||
- (gl->gl_name.ln_type == LM_TYPE_INODE &&
- atomic_read(&gl->gl_ref) <= 2)));
- spin_lock(&lru_lock);
- if (list_empty(&gl->gl_lru) && may_reclaim) {
+ if (demote_ok(gl)) {
+ spin_lock(&lru_lock);
+
+ if (!list_empty(&gl->gl_lru))
+ list_del_init(&gl->gl_lru);
+ else
+ atomic_inc(&lru_count);
+
list_add_tail(&gl->gl_lru, &lru_list);
- atomic_inc(&lru_count);
+ spin_unlock(&lru_lock);
}
- spin_unlock(&lru_lock);
+}
+
+void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+{
+ spin_lock(&gl->gl_spin);
+ __gfs2_glock_schedule_for_reclaim(gl);
+ spin_unlock(&gl->gl_spin);
}
/**
@@ -227,7 +202,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
{
if (atomic_dec_and_test(&gl->gl_ref))
GLOCK_BUG_ON(gl, 1);
- gfs2_glock_schedule_for_reclaim(gl);
}
/**
@@ -236,30 +210,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
*
*/
-int gfs2_glock_put(struct gfs2_glock *gl)
+void gfs2_glock_put(struct gfs2_glock *gl)
{
- int rv = 0;
+ struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct address_space *mapping = gfs2_glock2aspace(gl);
- write_lock(gl_lock_addr(gl->gl_hash));
- if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
- hlist_del(&gl->gl_list);
+ if (atomic_dec_and_test(&gl->gl_ref)) {
+ spin_lock_bucket(gl->gl_hash);
+ hlist_bl_del_rcu(&gl->gl_list);
+ spin_unlock_bucket(gl->gl_hash);
+ spin_lock(&lru_lock);
if (!list_empty(&gl->gl_lru)) {
list_del_init(&gl->gl_lru);
atomic_dec(&lru_count);
}
spin_unlock(&lru_lock);
- write_unlock(gl_lock_addr(gl->gl_hash));
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
- glock_free(gl);
- rv = 1;
- goto out;
+ GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+ trace_gfs2_glock_put(gl);
+ sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
- spin_lock(&gl->gl_spin);
- gfs2_glock_schedule_for_reclaim(gl);
- spin_unlock(&gl->gl_spin);
- write_unlock(gl_lock_addr(gl->gl_hash));
-out:
- return rv;
}
/**
@@ -275,17 +245,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
const struct lm_lockname *name)
{
struct gfs2_glock *gl;
- struct hlist_node *h;
+ struct hlist_bl_node *h;
- hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) {
+ hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
if (!lm_name_equal(&gl->gl_name, name))
continue;
if (gl->gl_sbd != sdp)
continue;
-
- atomic_inc(&gl->gl_ref);
-
- return gl;
+ if (atomic_inc_not_zero(&gl->gl_ref))
+ return gl;
}
return NULL;
@@ -743,10 +711,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct gfs2_glock *gl, *tmp;
unsigned int hash = gl_hash(sdp, &name);
struct address_space *mapping;
+ struct kmem_cache *cachep;
- read_lock(gl_lock_addr(hash));
+ rcu_read_lock();
gl = search_bucket(hash, sdp, &name);
- read_unlock(gl_lock_addr(hash));
+ rcu_read_unlock();
*glp = gl;
if (gl)
@@ -755,9 +724,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
return -ENOENT;
if (glops->go_flags & GLOF_ASPACE)
- gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL);
+ cachep = gfs2_glock_aspace_cachep;
else
- gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
+ cachep = gfs2_glock_cachep;
+ gl = kmem_cache_alloc(cachep, GFP_KERNEL);
if (!gl)
return -ENOMEM;
@@ -790,15 +760,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->writeback_index = 0;
}
- write_lock(gl_lock_addr(hash));
+ spin_lock_bucket(hash);
tmp = search_bucket(hash, sdp, &name);
if (tmp) {
- write_unlock(gl_lock_addr(hash));
- glock_free(gl);
+ spin_unlock_bucket(hash);
+ kmem_cache_free(cachep, gl);
+ atomic_dec(&sdp->sd_glock_disposal);
gl = tmp;
} else {
- hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list);
- write_unlock(gl_lock_addr(hash));
+ hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
+ spin_unlock_bucket(hash);
}
*glp = gl;
@@ -1007,13 +978,13 @@ fail:
insert_pt = &gh2->gh_list;
}
set_bit(GLF_QUEUED, &gl->gl_flags);
+ trace_gfs2_glock_queue(gh, 1);
if (likely(insert_pt == NULL)) {
list_add_tail(&gh->gh_list, &gl->gl_holders);
if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
goto do_cancel;
return;
}
- trace_gfs2_glock_queue(gh, 1);
list_add_tail(&gh->gh_list, insert_pt);
do_cancel:
gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1113,6 +1084,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
+ __gfs2_glock_schedule_for_reclaim(gl);
trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
@@ -1276,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
{
- unsigned int x;
-
- for (x = 0; x < num_gh; x++)
- gfs2_glock_dq(&ghs[x]);
+ while (num_gh--)
+ gfs2_glock_dq(&ghs[num_gh]);
}
/**
@@ -1291,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
{
- unsigned int x;
-
- for (x = 0; x < num_gh; x++)
- gfs2_glock_dq_uninit(&ghs[x]);
+ while (num_gh--)
+ gfs2_glock_dq_uninit(&ghs[num_gh]);
}
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
@@ -1440,42 +1408,30 @@ static struct shrinker glock_shrinker = {
* @sdp: the filesystem
* @bucket: the bucket
*
- * Returns: 1 if the bucket has entries
*/
-static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
+static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
unsigned int hash)
{
- struct gfs2_glock *gl, *prev = NULL;
- int has_entries = 0;
- struct hlist_head *head = &gl_hash_table[hash].hb_list;
+ struct gfs2_glock *gl;
+ struct hlist_bl_head *head = &gl_hash_table[hash];
+ struct hlist_bl_node *pos;
- read_lock(gl_lock_addr(hash));
- /* Can't use hlist_for_each_entry - don't want prefetch here */
- if (hlist_empty(head))
- goto out;
- gl = list_entry(head->first, struct gfs2_glock, gl_list);
- while(1) {
- if (!sdp || gl->gl_sbd == sdp) {
- gfs2_glock_hold(gl);
- read_unlock(gl_lock_addr(hash));
- if (prev)
- gfs2_glock_put(prev);
- prev = gl;
+ rcu_read_lock();
+ hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
+ if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
examiner(gl);
- has_entries = 1;
- read_lock(gl_lock_addr(hash));
- }
- if (gl->gl_list.next == NULL)
- break;
- gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
}
-out:
- read_unlock(gl_lock_addr(hash));
- if (prev)
- gfs2_glock_put(prev);
+ rcu_read_unlock();
cond_resched();
- return has_entries;
+}
+
+static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
+{
+ unsigned x;
+
+ for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
+ examine_bucket(examiner, sdp, x);
}
@@ -1529,10 +1485,21 @@ static void clear_glock(struct gfs2_glock *gl)
void gfs2_glock_thaw(struct gfs2_sbd *sdp)
{
- unsigned x;
+ glock_hash_walk(thaw_glock, sdp);
+}
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
- examine_bucket(thaw_glock, sdp, x);
+static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+{
+ int ret;
+ spin_lock(&gl->gl_spin);
+ ret = __dump_glock(seq, gl);
+ spin_unlock(&gl->gl_spin);
+ return ret;
+}
+
+static void dump_glock_func(struct gfs2_glock *gl)
+{
+ dump_glock(NULL, gl);
}
/**
@@ -1545,13 +1512,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
{
- unsigned int x;
-
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
- examine_bucket(clear_glock, sdp, x);
+ glock_hash_walk(clear_glock, sdp);
flush_workqueue(glock_workqueue);
wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
- gfs2_dump_lockstate(sdp);
+ glock_hash_walk(dump_glock_func, sdp);
}
void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,66 +1681,15 @@ out:
return error;
}
-static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
-{
- int ret;
- spin_lock(&gl->gl_spin);
- ret = __dump_glock(seq, gl);
- spin_unlock(&gl->gl_spin);
- return ret;
-}
-/**
- * gfs2_dump_lockstate - print out the current lockstate
- * @sdp: the filesystem
- * @ub: the buffer to copy the information into
- *
- * If @ub is NULL, dump the lockstate to the console.
- *
- */
-
-static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
-{
- struct gfs2_glock *gl;
- struct hlist_node *h;
- unsigned int x;
- int error = 0;
-
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
-
- read_lock(gl_lock_addr(x));
-
- hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
- if (gl->gl_sbd != sdp)
- continue;
-
- error = dump_glock(NULL, gl);
- if (error)
- break;
- }
-
- read_unlock(gl_lock_addr(x));
-
- if (error)
- break;
- }
-
-
- return error;
-}
int __init gfs2_glock_init(void)
{
unsigned i;
for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
- INIT_HLIST_HEAD(&gl_hash_table[i].hb_list);
- }
-#ifdef GL_HASH_LOCK_SZ
- for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
- rwlock_init(&gl_hash_locks[i]);
+ INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
}
-#endif
glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
WQ_HIGHPRI | WQ_FREEZABLE, 0);
@@ -1802,62 +1715,54 @@ void gfs2_glock_exit(void)
destroy_workqueue(gfs2_delete_workqueue);
}
+static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
+{
+ return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
+ struct gfs2_glock, gl_list);
+}
+
+static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
+{
+ return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
+ struct gfs2_glock, gl_list);
+}
+
static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
struct gfs2_glock *gl;
-restart:
- read_lock(gl_lock_addr(gi->hash));
- gl = gi->gl;
- if (gl) {
- gi->gl = hlist_entry(gl->gl_list.next,
- struct gfs2_glock, gl_list);
- } else {
- gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
- struct gfs2_glock, gl_list);
- }
- if (gi->gl)
- gfs2_glock_hold(gi->gl);
- read_unlock(gl_lock_addr(gi->hash));
- if (gl)
- gfs2_glock_put(gl);
- while (gi->gl == NULL) {
- gi->hash++;
- if (gi->hash >= GFS2_GL_HASH_SIZE)
- return 1;
- read_lock(gl_lock_addr(gi->hash));
- gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
- struct gfs2_glock, gl_list);
- if (gi->gl)
- gfs2_glock_hold(gi->gl);
- read_unlock(gl_lock_addr(gi->hash));
- }
-
- if (gi->sdp != gi->gl->gl_sbd)
- goto restart;
+ do {
+ gl = gi->gl;
+ if (gl) {
+ gi->gl = glock_hash_next(gl);
+ } else {
+ gi->gl = glock_hash_chain(gi->hash);
+ }
+ while (gi->gl == NULL) {
+ gi->hash++;
+ if (gi->hash >= GFS2_GL_HASH_SIZE) {
+ rcu_read_unlock();
+ return 1;
+ }
+ gi->gl = glock_hash_chain(gi->hash);
+ }
+ /* Skip entries for other sb and dead entries */
+ } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
return 0;
}
-static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
-{
- if (gi->gl)
- gfs2_glock_put(gi->gl);
- gi->gl = NULL;
-}
-
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
{
struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos;
gi->hash = 0;
+ rcu_read_lock();
do {
- if (gfs2_glock_iter_next(gi)) {
- gfs2_glock_iter_free(gi);
+ if (gfs2_glock_iter_next(gi))
return NULL;
- }
} while (n--);
return gi->gl;
@@ -1870,10 +1775,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
(*pos)++;
- if (gfs2_glock_iter_next(gi)) {
- gfs2_glock_iter_free(gi);
+ if (gfs2_glock_iter_next(gi))
return NULL;
- }
return gi->gl;
}
@@ -1881,7 +1784,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
{
struct gfs2_glock_iter *gi = seq->private;
- gfs2_glock_iter_free(gi);
+
+ if (gi->gl)
+ rcu_read_unlock();
+ gi->gl = NULL;
}
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851ceb61..aea160690e9 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
void (*lm_unmount) (struct gfs2_sbd *sdp);
void (*lm_withdraw) (struct gfs2_sbd *sdp);
- void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
+ void (*lm_put_lock) (struct gfs2_glock *gl);
int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
unsigned int flags);
void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
int create, struct gfs2_glock **glp);
void gfs2_glock_hold(struct gfs2_glock *gl);
void gfs2_glock_put_nolock(struct gfs2_glock *gl);
-int gfs2_glock_put(struct gfs2_glock *gl);
+void gfs2_glock_put(struct gfs2_glock *gl);
void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
struct gfs2_holder *gh);
void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
return error;
}
-/* Lock Value Block functions */
-
-int gfs2_lvb_hold(struct gfs2_glock *gl);
-void gfs2_lvb_unhold(struct gfs2_glock *gl);
-
-void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
-void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
-void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
-void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-
-int __init gfs2_glock_init(void);
-void gfs2_glock_exit(void);
-
-int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
-void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
-int gfs2_register_debugfs(void);
-void gfs2_unregister_debugfs(void);
+extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
+extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
+extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
+extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
+extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
+extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
+extern void gfs2_glock_free(struct gfs2_glock *gl);
+
+extern int __init gfs2_glock_init(void);
+extern void gfs2_glock_exit(void);
+
+extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
+extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
+extern int gfs2_register_debugfs(void);
+extern void gfs2_unregister_debugfs(void);
extern const struct lm_lockops gfs2_dlm_ops;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561bf1a5..3754e3cbf02 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
BUG_ON(current->journal_info);
current->journal_info = &tr;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata,
bd_ail_gl_list);
bh = bd->bd_bh;
gfs2_remove_from_ail(bd);
+ spin_unlock(&sdp->sd_ail_lock);
+
bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
+ gfs2_log_lock(sdp);
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
gfs2_trans_add_revoke(sdp, bd);
+ gfs2_log_unlock(sdp);
+
+ spin_lock(&sdp->sd_ail_lock);
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
@@ -206,8 +212,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
static int inode_go_demote_ok(const struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_holder *gh;
+
if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
return 0;
+
+ if (!list_empty(&gl->gl_holders)) {
+ gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+ if (gh->gh_list.next != &gl->gl_holders)
+ return 0;
+ }
+
return 1;
}
@@ -272,19 +287,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
}
/**
- * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
-{
- const struct address_space *mapping = (const struct address_space *)(gl + 1);
- return !mapping->nrpages;
-}
-
-/**
* rgrp_go_lock - operation done after an rgrp lock is locked by
* a first holder on this node.
* @gl: the glock
@@ -410,7 +412,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_xmote_th = rgrp_go_sync,
.go_inval = rgrp_go_inval,
- .go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c0627..870a89d6d4d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
#include <linux/workqueue.h>
#include <linux/dlm.h>
#include <linux/buffer_head.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -201,7 +203,7 @@ enum {
};
struct gfs2_glock {
- struct hlist_node gl_list;
+ struct hlist_bl_node gl_list;
unsigned long gl_flags; /* GLF_... */
struct lm_lockname gl_name;
atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
atomic_t gl_ail_count;
struct delayed_work gl_work;
struct work_struct gl_delete;
+ struct rcu_head gl_rcu;
};
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
@@ -314,6 +317,7 @@ enum {
QDF_USER = 0,
QDF_CHANGE = 1,
QDF_LOCKED = 2,
+ QDF_REFRESH = 3,
};
struct gfs2_quota_data {
@@ -647,6 +651,7 @@ struct gfs2_sbd {
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;
+ spinlock_t sd_ail_lock;
struct list_head sd_ail1_list;
struct list_head sd_ail2_list;
u64 sd_ail_sync_gen;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7aa7d4f8984..97d54a28776 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -763,14 +763,15 @@ fail:
return error;
}
-static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
+static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(&ip->i_inode, &dip->i_inode,
+ err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
&name, &value, &len);
if (err) {
@@ -854,7 +855,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock2;
- error = gfs2_security_init(dip, GFS2_I(inode));
+ error = gfs2_security_init(dip, GFS2_I(inode), name);
if (error)
goto fail_gunlock2;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493aee28f..98c80d8c2a6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
{
struct gfs2_glock *gl = arg;
unsigned ret = gl->gl_state;
- struct gfs2_sbd *sdp = gl->gl_sbd;
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
switch (gl->gl_lksb.sb_status) {
case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
- if (gl->gl_ops->go_flags & GLOF_ASPACE)
- kmem_cache_free(gfs2_glock_aspace_cachep, gl);
- else
- kmem_cache_free(gfs2_glock_cachep, gl);
- if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ gfs2_glock_free(gl);
return;
case -DLM_ECANCEL: /* Cancel while getting lock */
ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
}
-static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
+static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int error;
if (gl->gl_lksb.sb_lkid == 0) {
- kmem_cache_free(cachep, gl);
- if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ gfs2_glock_free(gl);
return;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f3575e1..e7ed31f858d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
* @mapping: The associated mapping (maybe NULL)
* @bd: The gfs2_bufdata to remove
*
- * The log lock _must_ be held when calling this function
+ * The ail lock _must_ be held when calling this function
*
*/
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
*/
static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
-__releases(&sdp->sd_log_lock)
-__acquires(&sdp->sd_log_lock)
+__releases(&sdp->sd_ail_lock)
+__acquires(&sdp->sd_ail_lock)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock)
list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
get_bh(bh);
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock)
unlock_buffer(bh);
brelse(bh);
}
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
retry = 1;
break;
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
struct gfs2_ail *ai;
int done = 0;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
head = &sdp->sd_ail1_list;
if (list_empty(head)) {
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
return;
}
sync_gen = sdp->sd_ail_sync_gen++;
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
if (ai->ai_sync_gen >= sync_gen)
continue;
ai->ai_sync_gen = sync_gen;
- gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */
+ gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
done = 0;
break;
}
}
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
}
static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
struct gfs2_ail *ai, *s;
int ret;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
if (gfs2_ail1_empty_one(sdp, ai, flags))
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
ret = list_empty(&sdp->sd_ail1_list);
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
return ret;
}
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
int wrap = (new_tail < old_tail);
int a, b, rm;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
a = (old_tail <= ai->ai_first);
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
kfree(ai);
}
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
}
/**
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
struct gfs2_ail *ai;
unsigned int tail;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
if (list_empty(&sdp->sd_ail1_list)) {
tail = sdp->sd_log_head;
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
tail = ai->ai_first;
}
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
return tail;
}
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
sdp->sd_log_commited_databuf = 0;
sdp->sd_log_commited_revoke = 0;
+ spin_lock(&sdp->sd_ail_lock);
if (!list_empty(&ai->ai_ail1_list)) {
list_add(&ai->ai_list, &sdp->sd_ail1_list);
ai = NULL;
}
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
trace_gfs2_log_flush(sdp, 0);
up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058..e919abf25ec 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
/* If this buffer is in the AIL and it has already been written
* to in-place disk block, remove it from the AIL.
*/
+ spin_lock(&sdp->sd_ail_lock);
if (bd->bd_ail)
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
+ spin_unlock(&sdp->sd_ail_lock);
get_bh(bh);
atomic_inc(&sdp->sd_log_pinned);
trace_gfs2_pin(bd, 1);
@@ -80,7 +82,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
@@ -91,9 +93,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
- clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+ spin_unlock(&sdp->sd_ail_lock);
+
+ if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
+ gfs2_glock_schedule_for_reclaim(bd->bd_gl);
trace_gfs2_pin(bd, 0);
- gfs2_log_unlock(sdp);
unlock_buffer(bh);
atomic_dec(&sdp->sd_log_pinned);
}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 85ba027d1c4..888a5f5a1a5 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
#include <asm/atomic.h>
#include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
- INIT_HLIST_NODE(&gl->gl_list);
+ INIT_HLIST_BL_NODE(&gl->gl_list);
spin_lock_init(&gl->gl_spin);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_lru);
@@ -59,14 +61,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
struct address_space *mapping = (struct address_space *)(gl + 1);
gfs2_init_glock_once(gl);
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ address_space_init_once(mapping);
}
/**
@@ -198,6 +193,8 @@ static void __exit exit_gfs2_fs(void)
unregister_filesystem(&gfs2meta_fs_type);
destroy_workqueue(gfs_recovery_wq);
+ rcu_barrier();
+
kmem_cache_destroy(gfs2_quotad_cachep);
kmem_cache_destroy(gfs2_rgrpd_cachep);
kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c7b3f..01d97f48655 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
brelse(bh);
}
if (bd) {
+ spin_lock(&sdp->sd_ail_lock);
if (bd->bd_ail) {
gfs2_remove_from_ail(bd);
bh->b_private = NULL;
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
bd->bd_blkno = bh->b_blocknr;
gfs2_trans_add_revoke(sdp, bd);
}
+ spin_unlock(&sdp->sd_ail_lock);
}
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927ce6f7..42ef24355af 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_waitqueue_head(&sdp->sd_log_waitq);
init_waitqueue_head(&sdp->sd_logd_waitq);
+ spin_lock_init(&sdp->sd_ail_lock);
INIT_LIST_HEAD(&sdp->sd_ail1_list);
INIT_LIST_HEAD(&sdp->sd_ail2_list);
@@ -928,17 +929,9 @@ static const match_table_t nolock_tokens = {
{ Opt_err, NULL },
};
-static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- kmem_cache_free(cachep, gl);
- if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
-}
-
static const struct lm_lockops nolock_ops = {
.lm_proto_name = "lock_nolock",
- .lm_put_lock = nolock_put_lock,
+ .lm_put_lock = gfs2_glock_free,
.lm_tokens = &nolock_tokens,
};
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d8b26ac2e20..09e436a5072 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
/**
* gfs2_permission -
- * @inode:
- * @mask:
- * @nd: passed from Linux VFS, ignored by us
+ * @inode: The inode
+ * @mask: The mask to be tested
+ * @flags: Indicates whether this is an RCU path walk or not
*
* This may be called from the VFS directly, or from within GFS2 with the
* inode locked, so we look to see if the glock is already locked and only
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
int error;
int unlock = 0;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a689901963d..e23d9864c41 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
goto out_end_trans;
do_qc(qd, -qd->qd_change_sync);
+ set_bit(QDF_REFRESH, &qd->qd_flags);
}
error = 0;
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_alloc *al = ip->i_alloc;
+ struct gfs2_quota_data *qd;
unsigned int x;
int error = 0;
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
sort_qd, NULL);
for (x = 0; x < al->al_qd_num; x++) {
- error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
+ int force = NO_FORCE;
+ qd = al->al_qd[x];
+ if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
+ force = FORCE;
+ error = do_glock(qd, force, &al->al_qd_ghs[x]);
if (error)
break;
}
@@ -1587,6 +1593,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
offset = qd2offset(qd);
alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
+ if (gfs2_is_stuffed(ip))
+ alloc_required = 1;
if (alloc_required) {
al = gfs2_alloc_get(ip);
if (al == NULL)
@@ -1600,7 +1608,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
blocks += gfs2_rg_blocks(al);
}
- error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0);
+ /* Some quotas span block boundaries and can update two blocks,
+ adding an extra block to the transaction to handle such quotas */
+ error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
if (error)
goto out_release;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7293ea27020..cf930cd9664 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1602,7 +1602,7 @@ rgrp_error:
*
*/
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
+}
+/**
+ * gfs2_free_data - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+ __gfs2_free_data(ip, bstart, blen);
gfs2_statfs_change(sdp, 0, +blen, 0);
gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
}
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
*
*/
-void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
+ gfs2_meta_wipe(ip, bstart, blen);
+}
+/**
+ * gfs2_free_meta - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+ __gfs2_free_meta(ip, bstart, blen);
gfs2_statfs_change(sdp, 0, +blen, 0);
gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
- gfs2_meta_wipe(ip, bstart, blen);
}
void gfs2_unlink_di(struct inode *inode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 50c2bb04369..a80e3034ac4 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
+extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa223..b4d70b13be9 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
/*
- * hfs_unlink()
+ * hfs_remove()
*
- * This is the unlink() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * file, given the inode for the parent directory and the name
- * (and its length) of the existing file.
- */
-static int hfs_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode;
- int res;
-
- inode = dentry->d_inode;
- res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
- if (res)
- return res;
-
- drop_nlink(inode);
- hfs_delete_inode(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
-
- return res;
-}
-
-/*
- * hfs_rmdir()
+ * This serves as both unlink() and rmdir() in the inode_operations
+ * structure for regular HFS directories. The purpose is to delete
+ * an existing child, given the inode for the parent directory and
+ * the name (and its length) of the existing directory.
*
- * This is the rmdir() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * directory, given the inode for the parent directory and the name
- * (and its length) of the existing directory.
+ * HFS does not have hardlinks, so both rmdir and unlink set the
+ * link count to 0. The only difference is the emptiness check.
*/
-static int hfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int hfs_remove(struct inode *dir, struct dentry *dentry)
{
- struct inode *inode;
+ struct inode *inode = dentry->d_inode;
int res;
- inode = dentry->d_inode;
- if (inode->i_size != 2)
+ if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
return -ENOTEMPTY;
res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* Unlink destination if it already exists */
if (new_dentry->d_inode) {
- res = hfs_unlink(new_dir, new_dentry);
+ res = hfs_remove(new_dir, new_dentry);
if (res)
return res;
}
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
const struct inode_operations hfs_dir_inode_operations = {
.create = hfs_create,
.lookup = hfs_lookup,
- .unlink = hfs_unlink,
+ .unlink = hfs_remove,
.mkdir = hfs_mkdir,
- .rmdir = hfs_rmdir,
+ .rmdir = hfs_remove,
.rename = hfs_rename,
.setattr = hfs_inode_setattr,
};
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f..9910c039f02 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly;
DEFINE_SPINLOCK(inode_lock);
/*
- * iprune_sem provides exclusion between the kswapd or try_to_free_pages
- * icache shrinking path, and the umount path. Without this exclusion,
- * by the time prune_icache calls iput for the inode whose pages it has
- * been invalidating, or by the time it calls clear_inode & destroy_inode
- * from its final dispose_list, the struct super_block they refer to
- * (for inode->i_sb->s_op) may already have been freed and reused.
+ * iprune_sem provides exclusion between the icache shrinking and the
+ * umount path.
*
- * We make this an rwsem because the fastpath is icache shrinking. In
- * some cases a filesystem may be doing a significant amount of work in
- * its inode reclaim code, so this should improve parallelism.
+ * We don't actually need it to protect anything in the umount path,
+ * but only need to cycle through it to make sure any inode that
+ * prune_icache took off the LRU list has been fully torn down by the
+ * time we are past evict_inodes.
*/
static DECLARE_RWSEM(iprune_sem);
@@ -295,6 +292,20 @@ static void destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, i_callback);
}
+void address_space_init_once(struct address_space *mapping)
+{
+ memset(mapping, 0, sizeof(*mapping));
+ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+ spin_lock_init(&mapping->tree_lock);
+ spin_lock_init(&mapping->i_mmap_lock);
+ INIT_LIST_HEAD(&mapping->private_list);
+ spin_lock_init(&mapping->private_lock);
+ INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+ INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
+
/*
* These are initializations that only need to be done
* once, because the fields are idempotent across use
@@ -308,13 +319,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
- INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
- spin_lock_init(&inode->i_data.tree_lock);
- spin_lock_init(&inode->i_data.i_mmap_lock);
- INIT_LIST_HEAD(&inode->i_data.private_list);
- spin_lock_init(&inode->i_data.private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
- INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
+ address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -508,17 +513,12 @@ void evict_inodes(struct super_block *sb)
struct inode *inode, *next;
LIST_HEAD(dispose);
- down_write(&iprune_sem);
-
spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (atomic_read(&inode->i_count))
continue;
-
- if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
- WARN_ON(1);
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
continue;
- }
inode->i_state |= I_FREEING;
@@ -534,28 +534,40 @@ void evict_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&dispose);
+
+ /*
+ * Cycle through iprune_sem to make sure any inode that prune_icache
+ * moved off the list before we took the lock has been fully torn
+ * down.
+ */
+ down_write(&iprune_sem);
up_write(&iprune_sem);
}
/**
* invalidate_inodes - attempt to free all inodes on a superblock
* @sb: superblock to operate on
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Attempts to free all inodes for a given superblock. If there were any
* busy inodes return a non-zero value, else zero.
+ * If @kill_dirty is set, discard dirty inodes too, otherwise treat
+ * them as busy.
*/
-int invalidate_inodes(struct super_block *sb)
+int invalidate_inodes(struct super_block *sb, bool kill_dirty)
{
int busy = 0;
struct inode *inode, *next;
LIST_HEAD(dispose);
- down_write(&iprune_sem);
-
spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
continue;
+ if (inode->i_state & I_DIRTY && !kill_dirty) {
+ busy = 1;
+ continue;
+ }
if (atomic_read(&inode->i_count)) {
busy = 1;
continue;
@@ -575,7 +587,6 @@ int invalidate_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&dispose);
- up_write(&iprune_sem);
return busy;
}
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b124..f3d15de44b1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,10 +106,23 @@ extern void put_super(struct super_block *sb);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+struct open_flags {
+ int open_flag;
+ int mode;
+ int acc_mode;
+ int intent;
+};
+extern struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int lookup_flags);
+extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
+ const char *, const struct open_flags *, int lookup_flags);
+
+extern long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag);
/*
* inode.c
*/
extern int get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb3847..dd4687ff30d 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
* offset of the inode and the upper 16 bits of fh32[1] to
* hold the offset of the parent.
*/
-
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *max_len = 5;
+ return 255;
+ } else if (len < 3) {
+ *max_len = 3;
return 255;
+ }
len = 3;
fh32[0] = ei->i_iget5_block;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 92978658ed1..82faddd1f32 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -215,8 +215,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
no chance of AB-BA deadlock involving its f->sem). */
mutex_unlock(&f->sem);
- ret = jffs2_do_create(c, dir_f, f, ri,
- dentry->d_name.name, dentry->d_name.len);
+ ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name);
if (ret)
goto fail;
@@ -386,7 +385,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
- ret = jffs2_init_security(inode, dir_i);
+ ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
if (ret)
goto fail;
@@ -530,7 +529,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
- ret = jffs2_init_security(inode, dir_i);
+ ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
if (ret)
goto fail;
@@ -703,7 +702,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
- ret = jffs2_init_security(inode, dir_i);
+ ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
if (ret)
goto fail;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 5a53d9bdb2b..e4619b00f7c 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -401,7 +401,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
struct jffs2_raw_inode *ri, unsigned char *buf,
uint32_t offset, uint32_t writelen, uint32_t *retlen);
int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f,
- struct jffs2_raw_inode *ri, const char *name, int namelen);
+ struct jffs2_raw_inode *ri, const struct qstr *qstr);
int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name,
int namelen, struct jffs2_inode_info *dead_f, uint32_t time);
int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino,
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 239f51216a6..cfeb7164b08 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -23,14 +23,15 @@
#include "nodelist.h"
/* ---- Initial Security Label Attachment -------------- */
-int jffs2_init_security(struct inode *inode, struct inode *dir)
+int jffs2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int rc;
size_t len;
void *value;
char *name;
- rc = security_inode_init_security(inode, dir, &name, &value, &len);
+ rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (rc) {
if (rc == -EOPNOTSUPP)
return 0;
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index c819eb0e982..30d175b6d29 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -424,7 +424,9 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
return ret;
}
-int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const char *name, int namelen)
+int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
+ struct jffs2_inode_info *f, struct jffs2_raw_inode *ri,
+ const struct qstr *qstr)
{
struct jffs2_raw_dirent *rd;
struct jffs2_full_dnode *fn;
@@ -466,15 +468,15 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode);
+ ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode, qstr);
if (ret)
return ret;
ret = jffs2_init_acl_post(&f->vfs_inode);
if (ret)
return ret;
- ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
- ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
+ ret = jffs2_reserve_space(c, sizeof(*rd)+qstr->len, &alloclen,
+ ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(qstr->len));
if (ret) {
/* Eep. */
@@ -493,19 +495,19 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
- rd->totlen = cpu_to_je32(sizeof(*rd) + namelen);
+ rd->totlen = cpu_to_je32(sizeof(*rd) + qstr->len);
rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4));
rd->pino = cpu_to_je32(dir_f->inocache->ino);
rd->version = cpu_to_je32(++dir_f->highest_version);
rd->ino = ri->ino;
rd->mctime = ri->ctime;
- rd->nsize = namelen;
+ rd->nsize = qstr->len;
rd->type = DT_REG;
rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
- rd->name_crc = cpu_to_je32(crc32(0, name, namelen));
+ rd->name_crc = cpu_to_je32(crc32(0, qstr->name, qstr->len));
- fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL);
+ fd = jffs2_write_dirent(c, dir_f, rd, qstr->name, qstr->len, ALLOC_NORMAL);
jffs2_free_raw_dirent(rd);
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index cf4f5759b42..7be4beb306f 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -121,10 +121,11 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
#endif /* CONFIG_JFFS2_FS_XATTR */
#ifdef CONFIG_JFFS2_FS_SECURITY
-extern int jffs2_init_security(struct inode *inode, struct inode *dir);
+extern int jffs2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
extern const struct xattr_handler jffs2_security_xattr_handler;
#else
-#define jffs2_init_security(inode,dir) (0)
+#define jffs2_init_security(inode,dir,qstr) (0)
#endif /* CONFIG_JFFS2_FS_SECURITY */
#endif /* _JFFS2_FS_XATTR_H_ */
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index 88b6cc535bf..e9e100fd7c0 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -62,10 +62,11 @@ extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
extern int jfs_removexattr(struct dentry *, const char *);
#ifdef CONFIG_JFS_SECURITY
-extern int jfs_init_security(tid_t, struct inode *, struct inode *);
+extern int jfs_init_security(tid_t, struct inode *, struct inode *,
+ const struct qstr *);
#else
static inline int jfs_init_security(tid_t tid, struct inode *inode,
- struct inode *dir)
+ struct inode *dir, const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb..eaaf2b511e8 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -115,7 +115,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
if (rc)
goto out3;
- rc = jfs_init_security(tid, ip, dip);
+ rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
if (rc) {
txAbort(tid, 0);
goto out3;
@@ -253,7 +253,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
if (rc)
goto out3;
- rc = jfs_init_security(tid, ip, dip);
+ rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
if (rc) {
txAbort(tid, 0);
goto out3;
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
if (ip->i_nlink == JFS_LINK_MAX)
return -EMLINK;
- if (ip->i_nlink == 0)
- return -ENOENT;
-
dquot_initialize(dir);
tid = txBegin(ip->i_sb, 0);
@@ -932,7 +929,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
- rc = jfs_init_security(tid, ip, dip);
+ rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
if (rc)
goto out3;
@@ -1395,7 +1392,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
if (rc)
goto out3;
- rc = jfs_init_security(tid, ip, dir);
+ rc = jfs_init_security(tid, ip, dir, &dentry->d_name);
if (rc) {
txAbort(tid, 0);
goto out3;
@@ -1600,7 +1597,7 @@ out:
static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 2d7f165d0f1..3fa4c32272d 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -1091,7 +1091,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
}
#ifdef CONFIG_JFS_SECURITY
-int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
+int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int rc;
size_t len;
@@ -1099,7 +1100,8 @@ int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
char *suffix;
char *name;
- rc = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ rc = security_inode_init_security(inode, dir, qstr, &suffix, &value,
+ &len);
if (rc) {
if (rc == -EOPNOTSUPP)
return 0;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdb..6e6777f1b4b 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
new_de = minix_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
minix_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= info->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = minix_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
minix_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9c2c6..b912b7abe74 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
return retval;
}
-char * getname(const char __user * filename)
+static char *getname_flags(const char __user * filename, int flags)
{
char *tmp, *result;
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
result = tmp;
if (retval < 0) {
- __putname(tmp);
- result = ERR_PTR(retval);
+ if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
+ __putname(tmp);
+ result = ERR_PTR(retval);
+ }
}
}
audit_getname(result);
return result;
}
+char *getname(const char __user * filename)
+{
+ return getname_flags(filename, 0);
+}
+
#ifdef CONFIG_AUDITSYSCALL
void putname(const char *name)
{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
{
struct fs_struct *fs = current->fs;
struct dentry *dentry = nd->path.dentry;
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
goto err;
BUG_ON(nd->inode != dentry->d_inode);
spin_unlock(&dentry->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
err:
spin_unlock(&dentry->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
parent->d_count++;
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -490,7 +501,7 @@ err:
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -498,8 +509,16 @@ err_root:
/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
{
- if (nd->flags & LOOKUP_RCU)
- return nameidata_dentry_drop_rcu(nd, dentry);
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
+ }
+ }
return 0;
}
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
spin_lock(&dentry->d_lock);
if (!__d_rcu_to_refcount(dentry, nd->seq))
goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
return -ECHILD;
}
-/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
-static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
-{
- if (likely(nd->flags & LOOKUP_RCU))
- return nameidata_drop_rcu_last(nd);
- return 0;
-}
-
/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
return dentry;
}
-static inline struct dentry *
-do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
-{
- int status = d_revalidate(dentry, nd);
- if (likely(status > 0))
- return dentry;
- if (status == -ECHILD) {
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return ERR_PTR(-ECHILD);
- return do_revalidate(dentry, nd);
- }
- if (status < 0)
- return ERR_PTR(status);
- /* Don't d_invalidate in rcu-walk mode */
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return ERR_PTR(-ECHILD);
- if (!d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
- return dentry;
-}
-
-static inline int need_reval_dot(struct dentry *dentry)
-{
- if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
- return 0;
-
- if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
- return 0;
-
- return 1;
-}
-
/*
- * force_reval_path - force revalidation of a dentry
+ * handle_reval_path - force revalidation of a dentry
*
* In some situations the path walking code will trust dentries without
* revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
* invalidate the dentry. It's up to the caller to handle putting references
* to the path if necessary.
*/
-static int
-force_reval_path(struct path *path, struct nameidata *nd)
+static inline int handle_reval_path(struct nameidata *nd)
{
+ struct dentry *dentry = nd->path.dentry;
int status;
- struct dentry *dentry = path->dentry;
- /*
- * only check on filesystems where it's possible for the dentry to
- * become stale.
- */
- if (!need_reval_dot(dentry))
+ if (likely(!(nd->flags & LOOKUP_JUMPED)))
+ return 0;
+
+ if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+ return 0;
+
+ if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
return 0;
+ /* Note: we do not d_invalidate() */
status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
- if (!status) {
- d_invalidate(dentry);
+ if (!status)
status = -ESTALE;
- }
+
return status;
}
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
path_put(&nd->path);
nd->path = nd->root;
path_get(&nd->root);
+ nd->flags |= LOOKUP_JUMPED;
}
nd->inode = nd->path.dentry->d_inode;
@@ -757,19 +737,42 @@ static inline void path_to_nameidata(const struct path *path,
nd->path.dentry = path->dentry;
}
+static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
+{
+ struct inode *inode = link->dentry->d_inode;
+ if (!IS_ERR(cookie) && inode->i_op->put_link)
+ inode->i_op->put_link(link->dentry, nd, cookie);
+ path_put(link);
+}
+
static __always_inline int
-__do_follow_link(const struct path *link, struct nameidata *nd, void **p)
+follow_link(struct path *link, struct nameidata *nd, void **p)
{
int error;
struct dentry *dentry = link->dentry;
BUG_ON(nd->flags & LOOKUP_RCU);
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
+
+ if (unlikely(current->total_link_count >= 40)) {
+ *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+ cond_resched();
+ current->total_link_count++;
+
touch_atime(link->mnt, dentry);
nd_set_link(nd, NULL);
- if (link->mnt == nd->path.mnt)
- mntget(link->mnt);
+ error = security_inode_follow_link(link->dentry, nd);
+ if (error) {
+ *p = ERR_PTR(error); /* no ->put_link(), please */
+ path_put(&nd->path);
+ return error;
+ }
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -780,56 +783,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
if (s)
error = __vfs_follow_link(nd, s);
else if (nd->last_type == LAST_BIND) {
- error = force_reval_path(&nd->path, nd);
- if (error)
+ nd->flags |= LOOKUP_JUMPED;
+ nd->inode = nd->path.dentry->d_inode;
+ if (nd->inode->i_op->follow_link) {
+ /* stepped on a _really_ weird one */
path_put(&nd->path);
+ error = -ELOOP;
+ }
}
}
return error;
}
-/*
- * This limits recursive symlink follows to 8, while
- * limiting consecutive symlinks to 40.
- *
- * Without that kind of total limit, nasty chains of consecutive
- * symlinks can cause almost arbitrarily long lookups.
- */
-static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
-{
- void *cookie;
- int err = -ELOOP;
-
- /* We drop rcu-walk here */
- if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
- return -ECHILD;
- BUG_ON(inode != path->dentry->d_inode);
-
- if (current->link_count >= MAX_NESTED_LINKS)
- goto loop;
- if (current->total_link_count >= 40)
- goto loop;
- BUG_ON(nd->depth >= MAX_NESTED_LINKS);
- cond_resched();
- err = security_inode_follow_link(path->dentry, nd);
- if (err)
- goto loop;
- current->link_count++;
- current->total_link_count++;
- nd->depth++;
- err = __do_follow_link(path, nd, &cookie);
- if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
- path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
- path_put(path);
- current->link_count--;
- nd->depth--;
- return err;
-loop:
- path_put_conditional(path, nd);
- path_put(&nd->path);
- return err;
-}
-
static int follow_up_rcu(struct path *path)
{
struct vfsmount *parent;
@@ -1068,7 +1033,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
seq = read_seqcount_begin(&parent->d_seq);
if (read_seqcount_retry(&old->d_seq, nd->seq))
- return -ECHILD;
+ goto failed;
inode = parent->d_inode;
nd->path.dentry = parent;
nd->seq = seq;
@@ -1081,8 +1046,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
}
__follow_mount_rcu(nd, &nd->path, &inode, true);
nd->inode = inode;
-
return 0;
+
+failed:
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
}
/*
@@ -1216,68 +1188,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
- struct inode *dir;
+ int need_reval = 1;
+ int status = 1;
int err;
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- err = parent->d_op->d_hash(parent, nd->inode, name);
- if (err < 0)
- return err;
- }
-
- /*
* Rename seqlock is not required here because in the off chance
* of a false negative due to a concurrent rename, we're going to
* do the non-racy lookup, below.
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
-
*inode = nd->inode;
dentry = __d_lookup_rcu(parent, name, &seq, inode);
- if (!dentry) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto need_lookup;
- }
+ if (!dentry)
+ goto unlazy;
+
/* Memory barrier in read_seqcount_begin of child is enough */
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
-
nd->seq = seq;
+
if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
- dentry = do_revalidate_rcu(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
- if (!(nd->flags & LOOKUP_RCU))
- goto done;
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status != -ECHILD)
+ need_reval = 0;
+ goto unlazy;
+ }
}
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, false)))
return 0;
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- /* fallthru */
+unlazy:
+ if (dentry) {
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return -ECHILD;
+ } else {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ } else {
+ dentry = __d_lookup(parent, name);
}
- dentry = __d_lookup(parent, name);
- if (!dentry)
- goto need_lookup;
-found:
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
- dentry = do_revalidate(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
+
+retry:
+ if (unlikely(!dentry)) {
+ struct inode *dir = parent->d_inode;
+ BUG_ON(nd->inode != dir);
+
+ mutex_lock(&dir->i_mutex);
+ dentry = d_lookup(parent, name);
+ if (likely(!dentry)) {
+ dentry = d_alloc_and_lookup(parent, name, nd);
+ if (IS_ERR(dentry)) {
+ mutex_unlock(&dir->i_mutex);
+ return PTR_ERR(dentry);
+ }
+ /* known good */
+ need_reval = 0;
+ status = 1;
+ }
+ mutex_unlock(&dir->i_mutex);
}
-done:
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status < 0) {
+ dput(dentry);
+ return status;
+ }
+ if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ need_reval = 1;
+ goto retry;
+ }
+ }
+
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd->flags);
@@ -1287,39 +1276,113 @@ done:
}
*inode = path->dentry->d_inode;
return 0;
+}
+
+static inline int may_lookup(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU) {
+ int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
+ if (err != -ECHILD)
+ return err;
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ return exec_permission(nd->inode, 0);
+}
-need_lookup:
- dir = parent->d_inode;
- BUG_ON(nd->inode != dir);
+static inline int handle_dots(struct nameidata *nd, int type)
+{
+ if (type == LAST_DOTDOT) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
+ }
+ return 0;
+}
- mutex_lock(&dir->i_mutex);
- /*
- * First re-do the cached lookup just in case it was created
- * while we waited for the directory semaphore, or the first
- * lookup failed due to an unrelated rename.
- *
- * This could use version numbering or similar to avoid unnecessary
- * cache lookups, but then we'd have to do the first lookup in the
- * non-racy way. However in the common case here, everything should
- * be hot in cache, so would it be a big win?
- */
- dentry = d_lookup(parent, name);
- if (likely(!dentry)) {
- dentry = d_alloc_and_lookup(parent, name, nd);
- mutex_unlock(&dir->i_mutex);
- if (IS_ERR(dentry))
- goto fail;
- goto done;
+static void terminate_walk(struct nameidata *nd)
+{
+ if (!(nd->flags & LOOKUP_RCU)) {
+ path_put(&nd->path);
+ } else {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
}
+}
+
+static inline int walk_component(struct nameidata *nd, struct path *path,
+ struct qstr *name, int type, int follow)
+{
+ struct inode *inode;
+ int err;
/*
- * Uhhuh! Nasty case: the cache was re-populated while
- * we waited on the semaphore. Need to revalidate.
+ * "." and ".." are special - ".." especially so because it has
+ * to be able to know about the current root directory and
+ * parent relationships.
*/
- mutex_unlock(&dir->i_mutex);
- goto found;
+ if (unlikely(type != LAST_NORM))
+ return handle_dots(nd, type);
+ err = do_lookup(nd, name, path, &inode);
+ if (unlikely(err)) {
+ terminate_walk(nd);
+ return err;
+ }
+ if (!inode) {
+ path_to_nameidata(path, nd);
+ terminate_walk(nd);
+ return -ENOENT;
+ }
+ if (unlikely(inode->i_op->follow_link) && follow) {
+ if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
+ return -ECHILD;
+ BUG_ON(inode != path->dentry->d_inode);
+ return 1;
+ }
+ path_to_nameidata(path, nd);
+ nd->inode = inode;
+ return 0;
+}
-fail:
- return PTR_ERR(dentry);
+/*
+ * This limits recursive symlink follows to 8, while
+ * limiting consecutive symlinks to 40.
+ *
+ * Without that kind of total limit, nasty chains of consecutive
+ * symlinks can cause almost arbitrarily long lookups.
+ */
+static inline int nested_symlink(struct path *path, struct nameidata *nd)
+{
+ int res;
+
+ BUG_ON(nd->depth >= MAX_NESTED_LINKS);
+ if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
+ path_put_conditional(path, nd);
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+
+ nd->depth++;
+ current->link_count++;
+
+ do {
+ struct path link = *path;
+ void *cookie;
+
+ res = follow_link(&link, nd, &cookie);
+ if (!res)
+ res = walk_component(nd, path, &nd->last,
+ nd->last_type, LOOKUP_FOLLOW);
+ put_link(nd, &link, cookie);
+ } while (res > 0);
+
+ current->link_count--;
+ nd->depth--;
+ return res;
}
/*
@@ -1339,30 +1402,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
while (*name=='/')
name++;
if (!*name)
- goto return_reval;
-
- if (nd->depth)
- lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
+ return 0;
/* At this point we know we have a real path component. */
for(;;) {
- struct inode *inode;
unsigned long hash;
struct qstr this;
unsigned int c;
+ int type;
nd->flags |= LOOKUP_CONTINUE;
- if (nd->flags & LOOKUP_RCU) {
- err = exec_permission(nd->inode, IPERM_FLAG_RCU);
- if (err == -ECHILD) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto exec_again;
- }
- } else {
-exec_again:
- err = exec_permission(nd->inode, 0);
- }
+
+ err = may_lookup(nd);
if (err)
break;
@@ -1378,52 +1429,43 @@ exec_again:
this.len = name - (const char *) this.name;
this.hash = end_name_hash(hash);
+ type = LAST_NORM;
+ if (this.name[0] == '.') switch (this.len) {
+ case 2:
+ if (this.name[1] == '.') {
+ type = LAST_DOTDOT;
+ nd->flags |= LOOKUP_JUMPED;
+ }
+ break;
+ case 1:
+ type = LAST_DOT;
+ }
+ if (likely(type == LAST_NORM)) {
+ struct dentry *parent = nd->path.dentry;
+ nd->flags &= ~LOOKUP_JUMPED;
+ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
+ err = parent->d_op->d_hash(parent, nd->inode,
+ &this);
+ if (err < 0)
+ break;
+ }
+ }
+
/* remove trailing slashes? */
if (!c)
goto last_component;
while (*++name == '/');
if (!*name)
- goto last_with_slashes;
+ goto last_component;
- /*
- * "." and ".." are special - ".." especially so because it has
- * to be able to know about the current root directory and
- * parent relationships.
- */
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- continue;
- }
- /* This does the actual lookups.. */
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- err = -ENOENT;
- if (!inode)
- goto out_dput;
+ err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
+ if (err < 0)
+ return err;
- if (inode->i_op->follow_link) {
- err = do_follow_link(inode, &next, nd);
+ if (err) {
+ err = nested_symlink(&next, nd);
if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- err = -ENOENT;
- if (!nd->inode)
- break;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
+ return err;
}
err = -ENOTDIR;
if (!nd->inode->i_op->lookup)
@@ -1431,209 +1473,109 @@ exec_again:
continue;
/* here ends the main loop */
-last_with_slashes:
- lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
/* Clear LOOKUP_CONTINUE iff it was previously unset */
nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
- if (lookup_flags & LOOKUP_PARENT)
- goto lookup_parent;
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- goto return_reval;
- }
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- if (inode && unlikely(inode->i_op->follow_link) &&
- (lookup_flags & LOOKUP_FOLLOW)) {
- err = do_follow_link(inode, &next, nd);
- if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
- }
- err = -ENOENT;
- if (!nd->inode)
- break;
- if (lookup_flags & LOOKUP_DIRECTORY) {
- err = -ENOTDIR;
- if (!nd->inode->i_op->lookup)
- break;
- }
- goto return_base;
-lookup_parent:
nd->last = this;
- nd->last_type = LAST_NORM;
- if (this.name[0] != '.')
- goto return_base;
- if (this.len == 1)
- nd->last_type = LAST_DOT;
- else if (this.len == 2 && this.name[1] == '.')
- nd->last_type = LAST_DOTDOT;
- else
- goto return_base;
-return_reval:
- /*
- * We bypassed the ordinary revalidation routines.
- * We may need to check the cached dentry for staleness.
- */
- if (need_reval_dot(nd->path.dentry)) {
- if (nameidata_drop_rcu_last_maybe(nd))
- return -ECHILD;
- /* Note: we do not d_invalidate() */
- err = d_revalidate(nd->path.dentry, nd);
- if (!err)
- err = -ESTALE;
- if (err < 0)
- break;
- return 0;
- }
-return_base:
- if (nameidata_drop_rcu_last_maybe(nd))
- return -ECHILD;
+ nd->last_type = type;
return 0;
-out_dput:
- if (!(nd->flags & LOOKUP_RCU))
- path_put_conditional(&next, nd);
- break;
}
- if (!(nd->flags & LOOKUP_RCU))
- path_put(&nd->path);
-return_err:
+ terminate_walk(nd);
return err;
}
-static inline int path_walk_rcu(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static inline int path_walk_simple(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static int path_walk(const char *name, struct nameidata *nd)
-{
- struct path save = nd->path;
- int result;
-
- current->total_link_count = 0;
-
- /* make sure the stuff we saved doesn't go away */
- path_get(&save);
-
- result = link_path_walk(name, nd);
- if (result == -ESTALE) {
- /* nd->path had been dropped */
- current->total_link_count = 0;
- nd->path = save;
- path_get(&nd->path);
- nd->flags |= LOOKUP_REVAL;
- result = link_path_walk(name, nd);
- }
-
- path_put(&save);
-
- return result;
-}
-
-static void path_finish_rcu(struct nameidata *nd)
-{
- if (nd->flags & LOOKUP_RCU) {
- /* RCU dangling. Cancel it. */
- nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
- }
- if (nd->file)
- fput(nd->file);
-}
-
-static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags,
+ struct nameidata *nd, struct file **fp)
{
int retval = 0;
int fput_needed;
struct file *file;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags | LOOKUP_RCU;
+ nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
+ if (flags & LOOKUP_ROOT) {
+ struct inode *inode = nd->root.dentry->d_inode;
+ if (*name) {
+ if (!inode->i_op->lookup)
+ return -ENOTDIR;
+ retval = inode_permission(inode, MAY_EXEC);
+ if (retval)
+ return retval;
+ }
+ nd->path = nd->root;
+ nd->inode = inode;
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } else {
+ path_get(&nd->path);
+ }
+ return 0;
+ }
+
nd->root.mnt = NULL;
- nd->file = NULL;
if (*name=='/') {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->root = fs->root;
- nd->path = nd->root;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
-
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ set_root_rcu(nd);
+ } else {
+ set_root(nd);
+ path_get(&nd->root);
+ }
+ nd->path = nd->root;
} else if (dfd == AT_FDCWD) {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->path = fs->pwd;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->path = fs->pwd;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+ } else {
+ get_fs_pwd(current->fs, &nd->path);
+ }
} else {
struct dentry *dentry;
- file = fget_light(dfd, &fput_needed);
+ file = fget_raw_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
dentry = file->f_path.dentry;
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
+ if (*name) {
+ retval = -ENOTDIR;
+ if (!S_ISDIR(dentry->d_inode->i_mode))
+ goto fput_fail;
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
+ retval = file_permission(file, MAY_EXEC);
+ if (retval)
+ goto fput_fail;
+ }
nd->path = file->f_path;
- if (fput_needed)
- nd->file = file;
-
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ if (fput_needed)
+ *fp = file;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ } else {
+ path_get(&file->f_path);
+ fput_light(file, fput_needed);
+ }
}
+
nd->inode = nd->path.dentry->d_inode;
return 0;
@@ -1643,60 +1585,23 @@ out_fail:
return retval;
}
-static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static inline int lookup_last(struct nameidata *nd, struct path *path)
{
- int retval = 0;
- int fput_needed;
- struct file *file;
-
- nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags;
- nd->depth = 0;
- nd->root.mnt = NULL;
-
- if (*name=='/') {
- set_root(nd);
- nd->path = nd->root;
- path_get(&nd->root);
- } else if (dfd == AT_FDCWD) {
- get_fs_pwd(current->fs, &nd->path);
- } else {
- struct dentry *dentry;
-
- file = fget_light(dfd, &fput_needed);
- retval = -EBADF;
- if (!file)
- goto out_fail;
-
- dentry = file->f_path.dentry;
-
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
+ if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
-
- nd->path = file->f_path;
- path_get(&file->f_path);
-
- fput_light(file, fput_needed);
- }
- nd->inode = nd->path.dentry->d_inode;
- return 0;
-
-fput_fail:
- fput_light(file, fput_needed);
-out_fail:
- return retval;
+ nd->flags &= ~LOOKUP_PARENT;
+ return walk_component(nd, path, &nd->last, nd->last_type,
+ nd->flags & LOOKUP_FOLLOW);
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
+static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- int retval;
+ struct file *base = NULL;
+ struct path path;
+ int err;
/*
* Path walking is largely split up into 2 different synchronisation
@@ -1712,44 +1617,75 @@ static int do_path_lookup(int dfd, const char *name,
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
- retval = path_init_rcu(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk_rcu(name, nd);
- path_finish_rcu(nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
+
+ if (unlikely(err))
+ return err;
+
+ current->total_link_count = 0;
+ err = link_path_walk(name, nd);
+
+ if (!err && !(flags & LOOKUP_PARENT)) {
+ err = lookup_last(nd, &path);
+ while (err > 0) {
+ void *cookie;
+ struct path link = path;
+ nd->flags |= LOOKUP_PARENT;
+ err = follow_link(&link, nd, &cookie);
+ if (!err)
+ err = lookup_last(nd, &path);
+ put_link(nd, &link, cookie);
+ }
}
- if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
- /* slower, locked walk */
- if (retval == -ESTALE)
- flags |= LOOKUP_REVAL;
- retval = path_init(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk(name, nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ if (nd->flags & LOOKUP_RCU) {
+ /* went all way through without dropping RCU */
+ BUG_ON(err);
+ if (nameidata_drop_rcu_last(nd))
+ err = -ECHILD;
+ }
+
+ if (!err)
+ err = handle_reval_path(nd);
+
+ if (!err && nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup) {
+ path_put(&nd->path);
+ return -ENOTDIR;
}
}
+ if (base)
+ fput(base);
+
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ return err;
+}
+
+static int do_path_lookup(int dfd, const char *name,
+ unsigned int flags, struct nameidata *nd)
+{
+ int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
+ if (unlikely(retval == -ECHILD))
+ retval = path_lookupat(dfd, name, flags, nd);
+ if (unlikely(retval == -ESTALE))
+ retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
+
if (likely(!retval)) {
if (unlikely(!audit_dummy_context())) {
if (nd->path.dentry && nd->inode)
audit_inode(name, nd->path.dentry);
}
}
-
return retval;
}
-int path_lookup(const char *name, unsigned int flags,
- struct nameidata *nd)
+int kern_path_parent(const char *name, struct nameidata *nd)
{
- return do_path_lookup(AT_FDCWD, name, flags, nd);
+ return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
}
int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1773,29 +1709,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
struct nameidata *nd)
{
- int retval;
-
- /* same as do_path_lookup */
- nd->last_type = LAST_ROOT;
- nd->flags = flags;
- nd->depth = 0;
-
- nd->path.dentry = dentry;
- nd->path.mnt = mnt;
- path_get(&nd->path);
- nd->root = nd->path;
- path_get(&nd->root);
- nd->inode = nd->path.dentry->d_inode;
-
- retval = path_walk(name, nd);
- if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->inode))
- audit_inode(name, nd->path.dentry);
-
- path_put(&nd->root);
- nd->root.mnt = NULL;
-
- return retval;
+ nd->root.dentry = dentry;
+ nd->root.mnt = mnt;
+ /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
+ return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
}
static struct dentry *__lookup_hash(struct qstr *name,
@@ -1810,17 +1727,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
return ERR_PTR(err);
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (base->d_flags & DCACHE_OP_HASH) {
- err = base->d_op->d_hash(base, inode, name);
- dentry = ERR_PTR(err);
- if (err < 0)
- goto out;
- }
-
- /*
* Don't bother with __d_lookup: callers are for creat as
* well as unlink, so a lot of the time it would cost
* a double lookup.
@@ -1832,7 +1738,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
if (!dentry)
dentry = d_alloc_and_lookup(base, name, nd);
-out:
+
return dentry;
}
@@ -1846,28 +1752,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
return __lookup_hash(&nd->last, nd->path.dentry, nd);
}
-static int __lookup_one_len(const char *name, struct qstr *this,
- struct dentry *base, int len)
-{
- unsigned long hash;
- unsigned int c;
-
- this->name = name;
- this->len = len;
- if (!len)
- return -EACCES;
-
- hash = init_name_hash();
- while (len--) {
- c = *(const unsigned char *)name++;
- if (c == '/' || c == '\0')
- return -EACCES;
- hash = partial_name_hash(c, hash);
- }
- this->hash = end_name_hash(hash);
- return 0;
-}
-
/**
* lookup_one_len - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
@@ -1881,14 +1765,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
- int err;
struct qstr this;
+ unsigned long hash;
+ unsigned int c;
WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
- err = __lookup_one_len(name, &this, base, len);
- if (err)
- return ERR_PTR(err);
+ this.name = name;
+ this.len = len;
+ if (!len)
+ return ERR_PTR(-EACCES);
+
+ hash = init_name_hash();
+ while (len--) {
+ c = *(const unsigned char *)name++;
+ if (c == '/' || c == '\0')
+ return ERR_PTR(-EACCES);
+ hash = partial_name_hash(c, hash);
+ }
+ this.hash = end_name_hash(hash);
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_flags & DCACHE_OP_HASH) {
+ int err = base->d_op->d_hash(base, base->d_inode, &this);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
return __lookup_hash(&this, base, NULL);
}
@@ -1897,7 +1801,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
struct path *path)
{
struct nameidata nd;
- char *tmp = getname(name);
+ char *tmp = getname_flags(name, flags);
int err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
@@ -2077,12 +1981,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
return error;
}
-int may_open(struct path *path, int acc_mode, int flag)
+static int may_open(struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
struct inode *inode = dentry->d_inode;
int error;
+ /* O_PATH? */
+ if (!acc_mode)
+ return 0;
+
if (!inode)
return -ENOENT;
@@ -2151,34 +2059,6 @@ static int handle_truncate(struct file *filp)
}
/*
- * Be careful about ever adding any more callers of this
- * function. Its flags must be in the namei format, not
- * what get passed to sys_open().
- */
-static int __open_namei_create(struct nameidata *nd, struct path *path,
- int open_flag, int mode)
-{
- int error;
- struct dentry *dir = nd->path.dentry;
-
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
- error = security_path_mknod(&nd->path, path->dentry, mode, 0);
- if (error)
- goto out_unlock;
- error = vfs_create(dir->d_inode, path->dentry, mode, nd);
-out_unlock:
- mutex_unlock(&dir->d_inode->i_mutex);
- dput(nd->path.dentry);
- nd->path.dentry = path->dentry;
-
- if (error)
- return error;
- /* Don't check for write permission, don't truncate */
- return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
-}
-
-/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
@@ -2202,126 +2082,115 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
-static int open_will_truncate(int flag, struct inode *inode)
-{
- /*
- * We'll never write to the fs underlying
- * a device file.
- */
- if (special_file(inode->i_mode))
- return 0;
- return (flag & O_TRUNC);
-}
-
-static struct file *finish_open(struct nameidata *nd,
- int open_flag, int acc_mode)
-{
- struct file *filp;
- int will_truncate;
- int error;
-
- will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
- if (will_truncate) {
- error = mnt_want_write(nd->path.mnt);
- if (error)
- goto exit;
- }
- error = may_open(&nd->path, acc_mode, open_flag);
- if (error) {
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- if (!IS_ERR(filp)) {
- if (will_truncate) {
- error = handle_truncate(filp);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- }
- /*
- * It is now safe to drop the mnt write
- * because the filp has had a write taken
- * on its behalf.
- */
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- return filp;
-
-exit:
- path_put(&nd->path);
- return ERR_PTR(error);
-}
-
/*
- * Handle O_CREAT case for do_filp_open
+ * Handle the last step of open()
*/
static struct file *do_last(struct nameidata *nd, struct path *path,
- int open_flag, int acc_mode,
- int mode, const char *pathname)
+ const struct open_flags *op, const char *pathname)
{
struct dentry *dir = nd->path.dentry;
+ struct dentry *dentry;
+ int open_flag = op->open_flag;
+ int will_truncate = open_flag & O_TRUNC;
+ int want_write = 0;
+ int acc_mode = op->acc_mode;
struct file *filp;
- int error = -EISDIR;
+ int error;
+
+ nd->flags &= ~LOOKUP_PARENT;
+ nd->flags |= op->intent;
switch (nd->last_type) {
case LAST_DOTDOT:
- follow_dotdot(nd);
- dir = nd->path.dentry;
case LAST_DOT:
- if (need_reval_dot(dir)) {
- int status = d_revalidate(nd->path.dentry, nd);
- if (!status)
- status = -ESTALE;
- if (status < 0) {
- error = status;
- goto exit;
- }
- }
+ error = handle_dots(nd, nd->last_type);
+ if (error)
+ return ERR_PTR(error);
/* fallthrough */
case LAST_ROOT:
- goto exit;
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
+ audit_inode(pathname, nd->path.dentry);
+ if (open_flag & O_CREAT) {
+ error = -EISDIR;
+ goto exit;
+ }
+ goto ok;
case LAST_BIND:
+ /* can't be RCU mode here */
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
audit_inode(pathname, dir);
goto ok;
}
+ if (!(open_flag & O_CREAT)) {
+ int symlink_ok = 0;
+ if (nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+ if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
+ symlink_ok = 1;
+ /* we _can_ be in RCU mode here */
+ error = walk_component(nd, path, &nd->last, LAST_NORM,
+ !symlink_ok);
+ if (error < 0)
+ return ERR_PTR(error);
+ if (error) /* symlink */
+ return NULL;
+ /* sayonara */
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ error = -ENOTDIR;
+ if (nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup)
+ goto exit;
+ }
+ audit_inode(pathname, nd->path.dentry);
+ goto ok;
+ }
+
+ /* create side of things */
+
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ audit_inode(pathname, dir);
+ error = -EISDIR;
/* trailing slashes? */
if (nd->last.name[nd->last.len])
goto exit;
mutex_lock(&dir->d_inode->i_mutex);
- path->dentry = lookup_hash(nd);
- path->mnt = nd->path.mnt;
-
- error = PTR_ERR(path->dentry);
- if (IS_ERR(path->dentry)) {
+ dentry = lookup_hash(nd);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
mutex_unlock(&dir->d_inode->i_mutex);
goto exit;
}
- if (IS_ERR(nd->intent.open.file)) {
- error = PTR_ERR(nd->intent.open.file);
- goto exit_mutex_unlock;
- }
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
/* Negative dentry, just create the file */
- if (!path->dentry->d_inode) {
+ if (!dentry->d_inode) {
+ int mode = op->mode;
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current_umask();
/*
* This write is needed to ensure that a
- * ro->rw transition does not occur between
+ * rw->ro transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
* the 'struct file' in nameidata_to_filp().
@@ -2329,22 +2198,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
error = mnt_want_write(nd->path.mnt);
if (error)
goto exit_mutex_unlock;
- error = __open_namei_create(nd, path, open_flag, mode);
- if (error) {
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- return filp;
+ want_write = 1;
+ /* Don't check for write permission, don't truncate */
+ open_flag &= ~O_TRUNC;
+ will_truncate = 0;
+ acc_mode = MAY_OPEN;
+ error = security_path_mknod(&nd->path, dentry, mode, 0);
+ if (error)
+ goto exit_mutex_unlock;
+ error = vfs_create(dir->d_inode, dentry, mode, nd);
+ if (error)
+ goto exit_mutex_unlock;
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(nd->path.dentry);
+ nd->path.dentry = dentry;
+ goto common;
}
/*
@@ -2374,7 +2242,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (S_ISDIR(nd->inode->i_mode))
goto exit;
ok:
- filp = finish_open(nd, open_flag, acc_mode);
+ if (!S_ISREG(nd->inode->i_mode))
+ will_truncate = 0;
+
+ if (will_truncate) {
+ error = mnt_want_write(nd->path.mnt);
+ if (error)
+ goto exit;
+ want_write = 1;
+ }
+common:
+ error = may_open(&nd->path, acc_mode, open_flag);
+ if (error)
+ goto exit;
+ filp = nameidata_to_filp(nd);
+ if (!IS_ERR(filp)) {
+ error = ima_file_check(filp, op->acc_mode);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ if (!IS_ERR(filp)) {
+ if (will_truncate) {
+ error = handle_truncate(filp);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ }
+out:
+ if (want_write)
+ mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
return filp;
exit_mutex_unlock:
@@ -2382,197 +2283,103 @@ exit_mutex_unlock:
exit_dput:
path_put_conditional(path, nd);
exit:
- path_put(&nd->path);
- return ERR_PTR(error);
+ filp = ERR_PTR(error);
+ goto out;
}
-/*
- * Note that the low bits of the passed in "open_flag"
- * are not the same as in the local variable "flag". See
- * open_to_namei_flags() for more details.
- */
-struct file *do_filp_open(int dfd, const char *pathname,
- int open_flag, int mode, int acc_mode)
+static struct file *path_openat(int dfd, const char *pathname,
+ struct nameidata *nd, const struct open_flags *op, int flags)
{
+ struct file *base = NULL;
struct file *filp;
- struct nameidata nd;
- int error;
struct path path;
- int count = 0;
- int flag = open_to_namei_flags(open_flag);
- int flags;
-
- if (!(open_flag & O_CREAT))
- mode = 0;
-
- /* Must never be set by userspace */
- open_flag &= ~FMODE_NONOTIFY;
-
- /*
- * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
- * check for O_DSYNC if the need any syncing at all we enforce it's
- * always set instead of having to deal with possibly weird behaviour
- * for malicious applications setting only __O_SYNC.
- */
- if (open_flag & __O_SYNC)
- open_flag |= O_DSYNC;
-
- if (!acc_mode)
- acc_mode = MAY_OPEN | ACC_MODE(open_flag);
-
- /* O_TRUNC implies we need access checks for write permissions */
- if (open_flag & O_TRUNC)
- acc_mode |= MAY_WRITE;
-
- /* Allow the LSM permission hook to distinguish append
- access from general write access. */
- if (open_flag & O_APPEND)
- acc_mode |= MAY_APPEND;
-
- flags = LOOKUP_OPEN;
- if (open_flag & O_CREAT) {
- flags |= LOOKUP_CREATE;
- if (open_flag & O_EXCL)
- flags |= LOOKUP_EXCL;
- }
- if (open_flag & O_DIRECTORY)
- flags |= LOOKUP_DIRECTORY;
- if (!(open_flag & O_NOFOLLOW))
- flags |= LOOKUP_FOLLOW;
+ int error;
filp = get_empty_filp();
if (!filp)
return ERR_PTR(-ENFILE);
- filp->f_flags = open_flag;
- nd.intent.open.file = filp;
- nd.intent.open.flags = flag;
- nd.intent.open.create_mode = mode;
-
- if (open_flag & O_CREAT)
- goto creat;
+ filp->f_flags = op->open_flag;
+ nd->intent.open.file = filp;
+ nd->intent.open.flags = open_to_namei_flags(op->open_flag);
+ nd->intent.open.create_mode = op->mode;
- /* !O_CREAT, simple open */
- error = do_path_lookup(dfd, pathname, flags, &nd);
+ error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
goto out_filp;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW)) {
- if (nd.inode->i_op->follow_link)
- goto out_path;
- }
- error = -ENOTDIR;
- if (nd.flags & LOOKUP_DIRECTORY) {
- if (!nd.inode->i_op->lookup)
- goto out_path;
- }
- audit_inode(pathname, nd.path.dentry);
- filp = finish_open(&nd, open_flag, acc_mode);
- release_open_intent(&nd);
- return filp;
-creat:
- /* OK, have to create the file. Find the parent. */
- error = path_init_rcu(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
- error = path_walk_rcu(pathname, &nd);
- path_finish_rcu(&nd);
- if (unlikely(error == -ECHILD || error == -ESTALE)) {
- /* slower, locked walk */
- if (error == -ESTALE) {
-reval:
- flags |= LOOKUP_REVAL;
- }
- error = path_init(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
-
- error = path_walk_simple(pathname, &nd);
- }
+ current->total_link_count = 0;
+ error = link_path_walk(pathname, nd);
if (unlikely(error))
goto out_filp;
- if (unlikely(!audit_dummy_context()))
- audit_inode(pathname, nd.path.dentry);
- /*
- * We have the parent and last component.
- */
- nd.flags = flags;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
+ filp = do_last(nd, &path, op, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path link = path;
- struct inode *linki = link.dentry->d_inode;
void *cookie;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW))
- goto exit_dput;
- if (count++ == 32)
- goto exit_dput;
- /*
- * This is subtle. Instead of calling do_follow_link() we do
- * the thing by hands. The reason is that this way we have zero
- * link_count and path_walk() (called from ->follow_link)
- * honoring LOOKUP_PARENT. After that we have the parent and
- * last component, i.e. we are in the same situation as after
- * the first path_walk(). Well, almost - if the last component
- * is normal we get its copy stored in nd->last.name and we will
- * have to putname() it when we are done. Procfs-like symlinks
- * just set LAST_BIND.
- */
- nd.flags |= LOOKUP_PARENT;
- error = security_inode_follow_link(link.dentry, &nd);
- if (error)
- goto exit_dput;
- error = __do_follow_link(&link, &nd, &cookie);
- if (unlikely(error)) {
- if (!IS_ERR(cookie) && linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- /* nd.path had been dropped */
- nd.path = link;
- goto out_path;
+ if (!(nd->flags & LOOKUP_FOLLOW)) {
+ path_put_conditional(&path, nd);
+ path_put(&nd->path);
+ filp = ERR_PTR(-ELOOP);
+ break;
}
- nd.flags &= ~LOOKUP_PARENT;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
- if (linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- path_put(&link);
+ nd->flags |= LOOKUP_PARENT;
+ nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
+ error = follow_link(&link, nd, &cookie);
+ if (unlikely(error))
+ filp = ERR_PTR(error);
+ else
+ filp = do_last(nd, &path, op, pathname);
+ put_link(nd, &link, cookie);
}
out:
- if (nd.root.mnt)
- path_put(&nd.root);
- if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
- goto reval;
- release_open_intent(&nd);
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
+ path_put(&nd->root);
+ if (base)
+ fput(base);
+ release_open_intent(nd);
return filp;
-exit_dput:
- path_put_conditional(&path, &nd);
-out_path:
- path_put(&nd.path);
out_filp:
filp = ERR_PTR(error);
goto out;
}
-/**
- * filp_open - open file and return file pointer
- *
- * @filename: path to open
- * @flags: open flags as per the open(2) second argument
- * @mode: mode for the new file if O_CREAT is set, else ignored
- *
- * This is the helper to open a file from kernelspace if you really
- * have to. But in generally you should not do this, so please move
- * along, nothing to see here..
- */
-struct file *filp_open(const char *filename, int flags, int mode)
+struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int flags)
+{
+ struct nameidata nd;
+ struct file *filp;
+
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(filp == ERR_PTR(-ECHILD)))
+ filp = path_openat(dfd, pathname, &nd, op, flags);
+ if (unlikely(filp == ERR_PTR(-ESTALE)))
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
+ return filp;
+}
+
+struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *name, const struct open_flags *op, int flags)
{
- return do_filp_open(AT_FDCWD, filename, flags, mode, 0);
+ struct nameidata nd;
+ struct file *file;
+
+ nd.root.mnt = mnt;
+ nd.root.dentry = dentry;
+
+ flags |= LOOKUP_ROOT;
+
+ if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+ return ERR_PTR(-ELOOP);
+
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(file == ERR_PTR(-ECHILD)))
+ file = path_openat(-1, name, &nd, op, flags);
+ if (unlikely(file == ERR_PTR(-ESTALE)))
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
+ return file;
}
-EXPORT_SYMBOL(filp_open);
/**
* lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3111,7 +2918,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
return error;
mutex_lock(&inode->i_mutex);
- error = dir->i_op->link(old_dentry, dir, new_dentry);
+ /* Make sure we don't allow creating hardlink to an unlinked file */
+ if (inode->i_nlink == 0)
+ error = -ENOENT;
+ else
+ error = dir->i_op->link(old_dentry, dir, new_dentry);
mutex_unlock(&inode->i_mutex);
if (!error)
fsnotify_link(dir, inode, new_dentry);
@@ -3133,15 +2944,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
struct dentry *new_dentry;
struct nameidata nd;
struct path old_path;
+ int how = 0;
int error;
char *to;
- if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
+ if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
return -EINVAL;
+ /*
+ * To use null names we require CAP_DAC_READ_SEARCH
+ * This ensures that not everyone will be able to create
+ * handlink using the passed filedescriptor.
+ */
+ if (flags & AT_EMPTY_PATH) {
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return -ENOENT;
+ how = LOOKUP_EMPTY;
+ }
+
+ if (flags & AT_SYMLINK_FOLLOW)
+ how |= LOOKUP_FOLLOW;
- error = user_path_at(olddfd, oldname,
- flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
- &old_path);
+ error = user_path_at(olddfd, oldname, how, &old_path);
if (error)
return error;
@@ -3578,7 +3401,7 @@ EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(__page_symlink);
EXPORT_SYMBOL(page_symlink);
EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(path_lookup);
+EXPORT_SYMBOL(kern_path_parent);
EXPORT_SYMBOL(kern_path);
EXPORT_SYMBOL(vfs_path_lookup);
EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b9537169..e96e03782de 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
.show = show_vfsmnt
};
+static int uuid_is_nil(u8 *uuid)
+{
+ int i;
+ u8 *cp = (u8 *)uuid;
+
+ for (i = 0; i < 16; i++) {
+ if (*cp++)
+ return 0;
+ }
+ return 1;
+}
+
static int show_mountinfo(struct seq_file *m, void *v)
{
struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
if (IS_MNT_UNBINDABLE(mnt))
seq_puts(m, " unbindable");
+ if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
+ /* print the uuid */
+ seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
+
/* Filesystem specific data */
seq_puts(m, " - ");
show_type(m, sb);
@@ -1244,7 +1260,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
*/
br_write_lock(vfsmount_lock);
if (mnt_get_count(mnt) != 2) {
- br_write_lock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
return -EBUSY;
}
br_write_unlock(vfsmount_lock);
@@ -1767,6 +1783,10 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
+ err = security_sb_remount(sb, data);
+ if (err)
+ return err;
+
down_write(&sb->s_umount);
if (flags & MS_BIND)
err = change_mount_flags(path->mnt, flags);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb..01768e5e2c9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
*/
u64 nfs_compat_user_ino64(u64 fileid)
{
- int ino;
+#ifdef CONFIG_COMPAT
+ compat_ulong_t ino;
+#else
+ unsigned long ino;
+#endif
if (enable_ino64)
return fileid;
@@ -1513,7 +1518,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a747407314..1be36cf65bf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+#else
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
-extern void nfs4_schedule_state_recovery(struct nfs_client *);
+extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
-extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
-extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
+extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8c..b73c34375f6 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
goto out_err;
}
buf = kmalloc(rlen + 1, GFP_KERNEL);
+ if (!buf) {
+ dprintk("%s: Not enough memory\n", __func__);
+ goto out_err;
+ }
buf[rlen] = '\0';
memcpy(buf, r_addr, rlen);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40a..0a07e353a96 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -256,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -272,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -295,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
-do_state_recovery:
- nfs4_schedule_state_recovery(clp);
+wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
if (ret == 0)
exception->retry = 1;
@@ -435,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
clp = res->sr_session->clp;
do_renew_lease(clp, timestamp);
/* Check sequence flags */
- if (atomic_read(&clp->cl_count) > 1)
- nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+ if (res->sr_status_flags != 0)
+ nfs4_schedule_lease_recovery(clp);
break;
case -NFS4ERR_DELAY:
/* The server detected a resend of the RPC call and
@@ -1255,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(
- server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
/* Don't recall a delegation if it was lost */
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_lease_recovery(server->nfs_client);
goto out;
case -ERESTARTSYS:
/*
@@ -1271,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
*/
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
* User RPCSEC_GSS context has expired.
@@ -1587,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
!test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
break;
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
ret = -EIO;
}
return ret;
@@ -3178,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
return;
}
do_renew_lease(clp, timestamp);
@@ -3252,6 +3251,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
}
}
+static int buf_to_pages_noslab(const void *buf, size_t buflen,
+ struct page **pages, unsigned int *pgbase)
+{
+ struct page *newpage, **spages;
+ int rc = 0;
+ size_t len;
+ spages = pages;
+
+ do {
+ len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
+ newpage = alloc_page(GFP_KERNEL);
+
+ if (newpage == NULL)
+ goto unwind;
+ memcpy(page_address(newpage), buf, len);
+ buf += len;
+ buflen -= len;
+ *pages++ = newpage;
+ rc++;
+ } while (buflen != 0);
+
+ return rc;
+
+unwind:
+ for(; rc > 0; rc--)
+ __free_page(spages[rc-1]);
+ return -ENOMEM;
+}
+
struct nfs4_cached_acl {
int cached;
size_t len;
@@ -3420,13 +3448,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
.rpc_argp = &arg,
.rpc_resp = &res,
};
- int ret;
+ int ret, i;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
+ i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+ if (i < 0)
+ return i;
nfs_inode_return_delegation(inode);
- buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+
+ /*
+ * Free each page after tx, so the only ref left is
+ * held by the network stack
+ */
+ for (; i > 0; i--)
+ put_page(pages[i-1]);
+
/*
* Acl update can result in inode attribute update.
* so mark the attribute cache invalid.
@@ -3464,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3480,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-do_state_recovery:
+wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
- nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
task->tk_status = 0;
@@ -4110,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("%s: cancelling lock!\n", __func__);
} else
nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_state *state = lsp->ls_state;
-
switch (error) {
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_EXPIRED:
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
if (new_lock_owner != 0 ||
(lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_nograce(clp, state);
- lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ nfs4_schedule_stateid_recovery(server, lsp->ls_state);
break;
case -NFS4ERR_STALE_STATEID:
- if (new_lock_owner != 0 ||
- (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_reboot(clp, state);
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
};
}
@@ -4366,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_EXPIRED:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ goto out;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -ERESTARTSYS:
/*
@@ -4381,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
err = 0;
goto out;
case -EKEYEXPIRED:
@@ -4988,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
int status;
unsigned *ptr;
struct nfs4_session *session = clp->cl_session;
+ long timeout = 0;
+ int err;
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ do {
+ status = _nfs4_proc_create_session(clp);
+ if (status == -NFS4ERR_DELAY) {
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ if (err)
+ status = err;
+ }
+ } while (status == -NFS4ERR_DELAY);
+
if (status)
goto out;
@@ -5100,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5187,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if (IS_ERR(task))
ret = PTR_ERR(task);
else
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
}
@@ -5203,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
}
ret = rpc_wait_for_completion_task(task);
- if (!ret)
+ if (!ret) {
+ struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
+
+ if (task->tk_status == 0)
+ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
ret = task->tk_status;
+ }
rpc_put_task(task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5309,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
status = PTR_ERR(task);
goto out;
}
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status == 0)
+ status = task->tk_status;
rpc_put_task(task);
return 0;
out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04..0592288f9f0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
}
/*
- * Schedule a state recovery attempt
+ * Schedule a lease recovery attempt
*/
-void nfs4_schedule_state_recovery(struct nfs_client *clp)
+void nfs4_schedule_lease_recovery(struct nfs_client *clp)
{
if (!clp)
return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
-int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
return 1;
}
-int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
return 1;
}
+void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
}
#ifdef CONFIG_NFS_V4_1
+void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+ nfs4_schedule_lease_recovery(session->clp);
+}
+
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
clp->cl_boot_time = CURRENT_TIME;
nfs4_state_start_reclaim_nograce(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee..94d50e86a12 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
*p++ = cpu_to_be32(OP_CREATE_SESSION);
- p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ p = xdr_encode_hyper(p, clp->cl_clientid);
*p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
*p++ = cpu_to_be32(args->flags); /*flags */
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_ex_clid);
+ xdr_decode_hyper(p, &clp->cl_clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a2002..c541093a5bf 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
+/* Default NFSROOT mount options. */
+#define NFS_DEF_OPTIONS "udp"
+
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
/* Text-based mount options passed to super.c */
-static char nfs_root_options[256] __initdata = "";
+static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
/* Address of NFS server */
static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
}
static int __init root_nfs_cat(char *dest, const char *src,
- const size_t destlen)
+ const size_t destlen)
{
+ size_t len = strlen(dest);
+
+ if (len && dest[len - 1] != ',')
+ if (strlcat(dest, ",", destlen) > destlen)
+ return -1;
+
if (strlcat(dest, src, destlen) > destlen)
return -1;
return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
if (root_nfs_cat(nfs_root_options, incoming,
sizeof(nfs_root_options)))
return -1;
-
- /*
- * Possibly prepare for more options to be appended
- */
- if (nfs_root_options[0] != '\0' &&
- nfs_root_options[strlen(nfs_root_options)] != ',')
- if (root_nfs_cat(nfs_root_options, ",",
- sizeof(nfs_root_options)))
- return -1;
-
return 0;
}
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
*/
static int __init root_nfs_data(char *cmdline)
{
- char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+ char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
int len, retval = -1;
char *tmp = NULL;
const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
* Append mandatory options for nfsroot so they override
* what has come before
*/
- snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+ snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
&servaddr);
- if (root_nfs_cat(nfs_root_options, addr_option,
+ if (root_nfs_cat(nfs_root_options, mand_options,
sizeof(nfs_root_options)))
goto out_optionstoolong;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd..6481d537d69 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
return 1;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046c..42b92d7a9cc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
+ if (how & FLUSH_SYNC)
+ rpc_wait_for_completion_task(task);
rpc_put_task(task);
return 0;
}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242dd..124e8fcb0dd 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
static struct file *do_open(char *name, int flags)
{
- struct nameidata nd;
struct vfsmount *mnt;
- int error;
+ struct file *file;
mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
if (IS_ERR(mnt))
return (struct file *)mnt;
- error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd);
- mntput(mnt); /* drop do_kern_mount reference */
- if (error)
- return ERR_PTR(error);
-
- if (flags == O_RDWR)
- error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
- else
- error = may_open(&nd.path, MAY_WRITE, flags);
+ file = file_open_root(mnt->mnt_root, mnt, name, flags);
- if (!error)
- return dentry_open(nd.path.dentry, nd.path.mnt, flags,
- current_cred());
-
- path_put(&nd.path);
- return ERR_PTR(error);
+ mntput(mnt); /* drop do_kern_mount reference */
+ return file;
}
static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f34..02eb4edf0ec 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8..7b566ec14e1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
static struct nfs4_delegation *
find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
{
- struct nfs4_delegation *dp = NULL;
+ struct nfs4_delegation *dp;
spin_lock(&recall_lock);
- list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
- if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
- break;
- }
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
+ spin_unlock(&recall_lock);
+ return dp;
+ }
spin_unlock(&recall_lock);
- return dp;
+ return NULL;
}
int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1275b865507..615f0a9f060 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
u32 dummy;
char *machine_name;
- int i;
+ int i, j;
int nr_secflavs;
READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(4);
READ32(dummy);
READ_BUF(dummy * 4);
- for (i = 0; i < dummy; ++i)
+ for (j = 0; j < dummy; ++j)
READ32(dummy);
break;
case RPC_AUTH_GSS:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f528..85f7baa15f5 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
#include "btnode.h"
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
- nilfs_mapping_init_once(btnc);
-}
-
static const struct address_space_operations def_btnode_aops = {
.sync_page = block_sync_page,
};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e..1b8ebd888c2 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
-void nilfs_btnode_cache_init_once(struct address_space *);
void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f6..a0babd2bff6 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct backing_dev_info *bdi = inode->i_sb->s_bdi;
INIT_LIST_HEAD(&shadow->frozen_buffers);
- nilfs_mapping_init_once(&shadow->frozen_data);
+ address_space_init_once(&shadow->frozen_data);
nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
- nilfs_mapping_init_once(&shadow->frozen_btnodes);
+ address_space_init_once(&shadow->frozen_btnodes);
nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
mi->mi_shadow = shadow;
return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd0..161791d2645 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inc_nlink(old_inode);
nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= NILFS_LINK_MAX)
goto out_dir;
}
- inc_nlink(old_inode);
err = nilfs_add_link(new_dentry, old_inode);
- if (err) {
- drop_nlink(old_inode);
- nilfs_mark_inode_dirty(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
nilfs_delete_entry(old_de, old_page);
- drop_nlink(old_inode);
if (dir_de) {
nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfe..a585b35fd6b 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}
-void nilfs_mapping_init_once(struct address_space *mapping)
-{
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
-
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-}
-
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd89..2a00953ebd5 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init_once(struct address_space *mapping);
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f3..2de9f636792 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
nilfs_segctor_map_segsum_entry(
sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
- if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
+ if (NILFS_I(inode)->i_root &&
+ !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
/* skip finfo */
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e..1673b3d9984 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
#ifdef CONFIG_NILFS_XATTR
init_rwsem(&ii->xattr_sem);
#endif
- nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+ address_space_init_once(&ii->i_btnode_cache);
ii->i_bmap = &ii->i_bmap_data;
inode_init_once(&ii->vfs_inode);
}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834..7eb90403fc8 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
int ret = 0; /* if all else fails, just return false */
struct ocfs2_super *osb;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4f..254652a9b54 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
dentry->d_name.len, dentry->d_name.name,
fh, len, connectable);
- if (len < 3 || (connectable && len < 6)) {
- mlog(ML_ERROR, "fh buffer is too small for encoding\n");
+ if (connectable && (len < 6)) {
+ *max_len = 6;
+ type = 255;
+ goto bail;
+ } else if (len < 3) {
+ *max_len = 3;
type = 255;
goto bail;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c..6180da1e37e 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
ocfs2_quota_trans_credits(sb);
}
-/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
- * bitmap block for the new bit) dx_root update for free list */
-#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
+/* data block for new dir/symlink, allocation of directory block, dx_root
+ * update for free list */
+#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 849fb4a2e81..d6c25d76b53 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -293,7 +293,7 @@ static int ocfs2_mknod(struct inode *dir,
}
/* get security xattr */
- status = ocfs2_init_security_get(inode, dir, &si);
+ status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
if (status) {
if (status == -EOPNOTSUPP)
si.enable = 0;
@@ -1665,7 +1665,7 @@ static int ocfs2_symlink(struct inode *dir,
}
/* get security xattr */
- status = ocfs2_init_security_get(inode, dir, &si);
+ status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
if (status) {
if (status == -EOPNOTSUPP)
si.enable = 0;
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 196fcb52d95..d5ab56cbe5c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot);
extern const struct dquot_operations ocfs2_quota_operations;
extern struct quota_format_type ocfs2_quota_format;
-int ocfs2_quota_setup(void);
-void ocfs2_quota_shutdown(void);
-
#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923eb24..a73f6416648 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -63,8 +63,6 @@
* write to gf
*/
-static struct workqueue_struct *ocfs2_quota_wq = NULL;
-
static void qsync_work_fn(struct work_struct *work);
static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
OCFS2_QBLK_RESERVED_SPACE;
oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
- queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- msecs_to_jiffies(oinfo->dqi_syncms));
+ schedule_delayed_work(&oinfo->dqi_sync_work,
+ msecs_to_jiffies(oinfo->dqi_syncms));
out_err:
mlog_exit(status);
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work)
struct super_block *sb = oinfo->dqi_gqinode->i_sb;
dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
- queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- msecs_to_jiffies(oinfo->dqi_syncms));
+ schedule_delayed_work(&oinfo->dqi_sync_work,
+ msecs_to_jiffies(oinfo->dqi_syncms));
}
/*
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = {
.alloc_dquot = ocfs2_alloc_dquot,
.destroy_dquot = ocfs2_destroy_dquot,
};
-
-int ocfs2_quota_setup(void)
-{
- ocfs2_quota_wq = create_workqueue("o2quot");
- if (!ocfs2_quota_wq)
- return -ENOMEM;
- return 0;
-}
-
-void ocfs2_quota_shutdown(void)
-{
- if (ocfs2_quota_wq) {
- flush_workqueue(ocfs2_quota_wq);
- destroy_workqueue(ocfs2_quota_wq);
- ocfs2_quota_wq = NULL;
- }
-}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e..c384d634872 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
u32 num_clusters, unsigned int e_flags)
{
int ret, delete, index, credits = 0;
- u32 new_bit, new_len;
+ u32 new_bit, new_len, orig_num_clusters;
unsigned int set_len;
struct ocfs2_super *osb = OCFS2_SB(sb);
handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
goto out;
}
+ orig_num_clusters = num_clusters;
+
while (num_clusters) {
ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
* in write-back mode.
*/
if (context->get_clusters == ocfs2_di_get_clusters) {
- ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+ ret = ocfs2_cow_sync_writeback(sb, context, cpos,
+ orig_num_clusters);
if (ret)
mlog_errno(ret);
}
@@ -4325,7 +4328,8 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
- error = ocfs2_init_security_and_acl(dir, new_orphan_inode);
+ error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
+ &new_dentry->d_name);
if (error)
mlog_errno(error);
}
@@ -4376,7 +4380,7 @@ static int ocfs2_user_path_parent(const char __user *path,
if (IS_ERR(s))
return PTR_ERR(s);
- error = path_lookup(s, LOOKUP_PARENT, nd);
+ error = kern_path_parent(s, nd);
if (error)
putname(s);
else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447..236ed1bdca2 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
struct mount_options *mopt,
int is_remount)
{
- int status;
+ int status, user_stack = 0;
char *p;
u32 tmp;
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
memcpy(mopt->cluster_stack, args[0].from,
OCFS2_STACK_LABEL_LEN);
mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ /*
+ * Open code the memcmp here as we don't have
+ * an osb to pass to
+ * ocfs2_userspace_stack().
+ */
+ if (memcmp(mopt->cluster_stack,
+ OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ user_stack = 1;
break;
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
}
}
- /* Ensure only one heartbeat mode */
- tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
- OCFS2_MOUNT_HB_NONE);
- if (hweight32(tmp) != 1) {
- mlog(ML_ERROR, "Invalid heartbeat mount options\n");
- status = 0;
- goto bail;
+ if (user_stack == 0) {
+ /* Ensure only one heartbeat mode */
+ tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
+ OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE);
+ if (hweight32(tmp) != 1) {
+ mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+ status = 0;
+ goto bail;
+ }
}
status = 1;
@@ -1645,16 +1657,11 @@ static int __init ocfs2_init(void)
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
}
- status = ocfs2_quota_setup();
- if (status)
- goto leave;
-
ocfs2_set_locking_protocol();
status = register_quota_format(&ocfs2_quota_format);
leave:
if (status < 0) {
- ocfs2_quota_shutdown();
ocfs2_free_mem_caches();
exit_ocfs2_uptodate_cache();
}
@@ -1671,8 +1678,6 @@ static void __exit ocfs2_exit(void)
{
mlog_entry_void();
- ocfs2_quota_shutdown();
-
if (ocfs2_wq) {
flush_workqueue(ocfs2_wq);
destroy_workqueue(ocfs2_wq);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 67cd4391464..6bb602486c6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7185,7 +7185,8 @@ out:
* must not hold any lock expect i_mutex.
*/
int ocfs2_init_security_and_acl(struct inode *dir,
- struct inode *inode)
+ struct inode *inode,
+ const struct qstr *qstr)
{
int ret = 0;
struct buffer_head *dir_bh = NULL;
@@ -7193,7 +7194,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
.enable = 1,
};
- ret = ocfs2_init_security_get(inode, dir, &si);
+ ret = ocfs2_init_security_get(inode, dir, qstr, &si);
if (!ret) {
ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
si.name, si.value, si.value_len,
@@ -7261,13 +7262,14 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
int ocfs2_init_security_get(struct inode *inode,
struct inode *dir,
+ const struct qstr *qstr,
struct ocfs2_security_xattr_info *si)
{
/* check whether ocfs2 support feature xattr */
if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
return -EOPNOTSUPP;
- return security_inode_init_security(inode, dir, &si->name, &si->value,
- &si->value_len);
+ return security_inode_init_security(inode, dir, qstr, &si->name,
+ &si->value, &si->value_len);
}
int ocfs2_init_security_set(handle_t *handle,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index aa64bb37a65..d63cfb72316 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -57,6 +57,7 @@ int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
struct ocfs2_dinode *di);
int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
int ocfs2_init_security_get(struct inode *, struct inode *,
+ const struct qstr *,
struct ocfs2_security_xattr_info *);
int ocfs2_init_security_set(handle_t *, struct inode *,
struct buffer_head *,
@@ -94,5 +95,6 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
struct buffer_head *new_bh,
bool preserve_security);
int ocfs2_init_security_and_acl(struct inode *dir,
- struct inode *inode);
+ struct inode *inode,
+ const struct qstr *qstr);
#endif /* OCFS2_XATTR_H */
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b..f83ca80cc59 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
+
+ /* It's not possible punch hole on append only file */
+ if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+ return -EPERM;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
/*
* Revalidate the write permissions, in case security policy has
* changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
{
struct path path;
int error = -EINVAL;
- int follow;
+ int lookup_flags;
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
goto out;
- follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
- error = user_path_at(dfd, filename, follow, &path);
+ lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
goto out;
error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int (*open)(struct inode *, struct file *),
const struct cred *cred)
{
+ static const struct file_operations empty_fops = {};
struct inode *inode;
int error;
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
+
+ if (unlikely(f->f_flags & O_PATH))
+ f->f_mode = FMODE_PATH;
+
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_path.dentry = dentry;
f->f_path.mnt = mnt;
f->f_pos = 0;
- f->f_op = fops_get(inode->i_fop);
file_sb_list_add(f, inode->i_sb);
+ if (unlikely(f->f_mode & FMODE_PATH)) {
+ f->f_op = &empty_fops;
+ return f;
+ }
+
+ f->f_op = fops_get(inode->i_fop);
+
error = security_dentry_open(f, cred);
if (error)
goto cleanup_all;
@@ -693,7 +714,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
if (error)
goto cleanup_all;
}
- ima_counts_get(f);
+ if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ i_readcount_inc(inode);
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -882,15 +904,110 @@ void fd_install(unsigned int fd, struct file *file)
EXPORT_SYMBOL(fd_install);
+static inline int build_open_flags(int flags, int mode, struct open_flags *op)
+{
+ int lookup_flags = 0;
+ int acc_mode;
+
+ if (!(flags & O_CREAT))
+ mode = 0;
+ op->mode = mode;
+
+ /* Must never be set by userspace */
+ flags &= ~FMODE_NONOTIFY;
+
+ /*
+ * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
+ * check for O_DSYNC if the need any syncing at all we enforce it's
+ * always set instead of having to deal with possibly weird behaviour
+ * for malicious applications setting only __O_SYNC.
+ */
+ if (flags & __O_SYNC)
+ flags |= O_DSYNC;
+
+ /*
+ * If we have O_PATH in the open flag. Then we
+ * cannot have anything other than the below set of flags
+ */
+ if (flags & O_PATH) {
+ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
+ acc_mode = 0;
+ } else {
+ acc_mode = MAY_OPEN | ACC_MODE(flags);
+ }
+
+ op->open_flag = flags;
+
+ /* O_TRUNC implies we need access checks for write permissions */
+ if (flags & O_TRUNC)
+ acc_mode |= MAY_WRITE;
+
+ /* Allow the LSM permission hook to distinguish append
+ access from general write access. */
+ if (flags & O_APPEND)
+ acc_mode |= MAY_APPEND;
+
+ op->acc_mode = acc_mode;
+
+ op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
+
+ if (flags & O_CREAT) {
+ op->intent |= LOOKUP_CREATE;
+ if (flags & O_EXCL)
+ op->intent |= LOOKUP_EXCL;
+ }
+
+ if (flags & O_DIRECTORY)
+ lookup_flags |= LOOKUP_DIRECTORY;
+ if (!(flags & O_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
+ return lookup_flags;
+}
+
+/**
+ * filp_open - open file and return file pointer
+ *
+ * @filename: path to open
+ * @flags: open flags as per the open(2) second argument
+ * @mode: mode for the new file if O_CREAT is set, else ignored
+ *
+ * This is the helper to open a file from kernelspace if you really
+ * have to. But in generally you should not do this, so please move
+ * along, nothing to see here..
+ */
+struct file *filp_open(const char *filename, int flags, int mode)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
+ return do_filp_open(AT_FDCWD, filename, &op, lookup);
+}
+EXPORT_SYMBOL(filp_open);
+
+struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *filename, int flags)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, 0, &op);
+ if (flags & O_CREAT)
+ return ERR_PTR(-EINVAL);
+ if (!filename && (flags & O_DIRECTORY))
+ if (!dentry->d_inode->i_op->lookup)
+ return ERR_PTR(-ENOTDIR);
+ return do_file_open_root(dentry, mnt, filename, &op, lookup);
+}
+EXPORT_SYMBOL(file_open_root);
+
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd_flags(flags);
if (fd >= 0) {
- struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
+ struct file *f = do_filp_open(dfd, tmp, &op, lookup);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
@@ -960,8 +1077,10 @@ int filp_close(struct file *filp, fl_owner_t id)
if (filp->f_op && filp->f_op->flush)
retval = filp->f_op->flush(filp, id);
- dnotify_flush(filp, id);
- locks_remove_posix(filp, id);
+ if (likely(!(filp->f_mode & FMODE_PATH))) {
+ dnotify_flush(filp, id);
+ locks_remove_posix(filp, id);
+ }
fput(filp);
return retval;
}
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa..b10e3540d5b 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
}
vm->vblk_size = get_unaligned_be32(data + 0x08);
+ if (vm->vblk_size == 0) {
+ ldm_error ("Illegal VBLK size");
+ return false;
+ }
+
vm->vblk_offset = get_unaligned_be32(data + 0x0C);
vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca1..764b86a0196 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
#include "check.h"
#include "osf.h"
+#define MAX_OSF_PARTITIONS 18
+
int osf_partition(struct parsed_partitions *state)
{
int i;
int slot = 1;
+ unsigned int npartitions;
Sector sect;
unsigned char *data;
struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
u8 p_fstype;
u8 p_frag;
__le16 p_cpg;
- } d_partitions[8];
+ } d_partitions[MAX_OSF_PARTITIONS];
} * label;
struct d_partition * partition;
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
put_dev_sector(sect);
return 0;
}
- for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) {
+ npartitions = le16_to_cpu(label->d_npartitions);
+ if (npartitions > MAX_OSF_PARTITIONS) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ for (i = 0 ; i < npartitions; i++, partition++) {
if (slot == state->limit)
break;
if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b20..d49c4b5d2c3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
&proc_self_inode_operations, NULL, {}),
};
-/*
- * Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- */
-static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *inode;
- struct task_struct *task;
-
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
-
- inode = dentry->d_inode;
- task = get_proc_task(inode);
- if (task) {
- put_task_struct(task);
- return 1;
- }
- d_drop(dentry);
- return 0;
-}
-
-static const struct dentry_operations proc_base_dentry_operations =
-{
- .d_revalidate = proc_base_revalidate,
- .d_delete = pid_delete_dentry,
-};
-
static struct dentry *proc_base_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68..d6a7ca1fdac 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
+ struct ctl_table_header *head;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
de = PROC_I(inode)->pde;
if (de)
pde_put(de);
- if (PROC_I(inode)->sysctl)
- sysctl_head_put(PROC_I(inode)->sysctl);
+ head = PROC_I(inode)->sysctl;
+ if (head) {
+ rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+ sysctl_head_put(head);
+ }
}
struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7f..927cbd115e5 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
return;
root = of_find_node_by_path("/");
if (root == NULL) {
- printk(KERN_ERR "/proc/device-tree: can't find root\n");
+ pr_debug("/proc/device-tree: can't find root\n");
return;
}
proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34e..f50133c11c2 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -32,7 +32,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
ei->sysctl_entry = table;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
inode->i_mode = table->mode;
if (!table->child) {
inode->i_mode |= S_IFREG;
@@ -408,15 +407,18 @@ static int proc_sys_compare(const struct dentry *parent,
const struct dentry *dentry, const struct inode *inode,
unsigned int len, const char *str, const struct qstr *name)
{
+ struct ctl_table_header *head;
/* Although proc doesn't have negative dentries, rcu-walk means
* that inode here can be NULL */
+ /* AV: can it, indeed? */
if (!inode)
- return 0;
+ return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- return !sysctl_is_seen(PROC_I(inode)->sysctl);
+ head = rcu_dereference(PROC_I(inode)->sysctl);
+ return !head || !sysctl_is_seen(head);
}
static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e..1bba24bad82 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
struct inode *inode = dentry->d_inode;
int maxlen = *lenp;
- if (maxlen < 3)
+ if (need_parent && (maxlen < 5)) {
+ *lenp = 5;
return 255;
+ } else if (maxlen < 3) {
+ *lenp = 3;
+ return 255;
+ }
data[0] = inode->i_ino;
data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3eea859e699..c77514bd577 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
reiserfs_mounted_fs_count++;
if (reiserfs_mounted_fs_count <= 1) {
reiserfs_write_unlock(sb);
- commit_wq = create_workqueue("reiserfs");
+ commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
reiserfs_write_lock(sb);
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec345..118662690cd 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -593,7 +593,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
new_inode_init(inode, dir, mode);
jbegin_count += reiserfs_cache_default_acl(dir);
- retval = reiserfs_security_init(dir, inode, &security);
+ retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -667,7 +667,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
new_inode_init(inode, dir, mode);
jbegin_count += reiserfs_cache_default_acl(dir);
- retval = reiserfs_security_init(dir, inode, &security);
+ retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -747,7 +747,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
new_inode_init(inode, dir, mode);
jbegin_count += reiserfs_cache_default_acl(dir);
- retval = reiserfs_security_init(dir, inode, &security);
+ retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
dentry, inode, &security);
if (retval) {
- dir->i_nlink--;
+ DEC_DIR_INODE_NLINK(dir)
goto out_failed;
}
@@ -1032,7 +1032,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
}
new_inode_init(inode, parent_dir, mode);
- retval = reiserfs_security_init(parent_dir, inode, &security);
+ retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name,
+ &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -1122,10 +1123,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
reiserfs_write_unlock(dir->i_sb);
return -EMLINK;
}
- if (inode->i_nlink == 0) {
- reiserfs_write_unlock(dir->i_sb);
- return -ENOENT;
- }
/* inc before scheduling so reiserfs_unlink knows we are here */
inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e93364..5c11ca82b78 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
return -EPERM;
}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 237c6928d3c..ef66c18a933 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -54,6 +54,7 @@ static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
* of blocks needed for the transaction. If successful, reiserfs_security
* must be released using reiserfs_security_free when the caller is done. */
int reiserfs_security_init(struct inode *dir, struct inode *inode,
+ const struct qstr *qstr,
struct reiserfs_security_handle *sec)
{
int blocks = 0;
@@ -65,7 +66,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
if (IS_PRIVATE(dir))
return 0;
- error = security_inode_init_security(inode, dir, &sec->name,
+ error = security_inode_init_security(inode, dir, qstr, &sec->name,
&sec->value, &sec->length);
if (error) {
if (error == -EOPNOTSUPP)
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b70..961039121cb 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int error = -EINVAL;
int lookup_flags = 0;
- if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
+ AT_EMPTY_PATH)) != 0)
goto out;
if (!(flag & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flag & AT_NO_AUTOMOUNT)
lookup_flags |= LOOKUP_NO_AUTOMOUNT;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
if (bufsiz <= 0)
return -EINVAL;
- error = user_path_at(dfd, pathname, 0, &path);
+ error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
if (!error) {
struct inode *inode = path.dentry->d_inode;
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996..8244924dec5 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
}
EXPORT_SYMBOL(vfs_statfs);
-static int do_statfs_native(struct path *path, struct statfs *buf)
+int user_statfs(const char __user *pathname, struct kstatfs *st)
{
- struct kstatfs st;
- int retval;
+ struct path path;
+ int error = user_path(pathname, &path);
+ if (!error) {
+ error = vfs_statfs(&path, st);
+ path_put(&path);
+ }
+ return error;
+}
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
+int fd_statfs(int fd, struct kstatfs *st)
+{
+ struct file *file = fget(fd);
+ int error = -EBADF;
+ if (file) {
+ error = vfs_statfs(&file->f_path, st);
+ fput(file);
+ }
+ return error;
+}
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
+{
+ struct statfs buf;
+
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- if (sizeof buf->f_blocks == 4) {
- if ((st.f_blocks | st.f_bfree | st.f_bavail |
- st.f_bsize | st.f_frsize) &
+ if (sizeof buf.f_blocks == 4) {
+ if ((st->f_blocks | st->f_bfree | st->f_bavail |
+ st->f_bsize | st->f_frsize) &
0xffffffff00000000ULL)
return -EOVERFLOW;
/*
* f_files and f_ffree may be -1; it's okay to stuff
* that into 32 bits
*/
- if (st.f_files != -1 &&
- (st.f_files & 0xffffffff00000000ULL))
+ if (st->f_files != -1 &&
+ (st->f_files & 0xffffffff00000000ULL))
return -EOVERFLOW;
- if (st.f_ffree != -1 &&
- (st.f_ffree & 0xffffffff00000000ULL))
+ if (st->f_ffree != -1 &&
+ (st->f_ffree & 0xffffffff00000000ULL))
return -EOVERFLOW;
}
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
-static int do_statfs64(struct path *path, struct statfs64 *buf)
+static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
{
- struct kstatfs st;
- int retval;
-
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
-
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+ struct statfs64 buf;
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs tmp;
- error = do_statfs_native(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ struct kstatfs st;
+ int error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
{
- struct path path;
- long error;
-
+ struct kstatfs st;
+ int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs64 tmp;
- error = do_statfs64(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
{
- struct file *file;
- struct statfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs_native(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ struct kstatfs st;
+ int error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
{
- struct file *file;
- struct statfs64 tmp;
+ struct kstatfs st;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs64(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c2..e474fbcf8bd 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
new_de = sysv_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
sysv_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = sysv_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
sysv_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7..7217d67a80a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- *
- * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
- * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
- * lock 'dirA->i_mutex', so this is possible. Both of the functions
- * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
- * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
- * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
- * to the list of orphans. After this, 'vfs_link()' will link
- * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
- * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
- * to the list of orphans.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
err = dbg_check_synced_i_size(inode);
if (err)
return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d..f1dce848ef9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
#include <linux/crc-itu-t.h>
#include <linux/exportfs.h>
+enum { UDF_MAX_LINKS = 0xffff };
+
static inline int udf_match(int len1, const unsigned char *name1, int len2,
const unsigned char *name2)
{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *iinfo;
err = -EMLINK;
- if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
+ if (dir->i_nlink >= UDF_MAX_LINKS)
goto out;
err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
struct fileIdentDesc cfi, *fi;
int err;
- if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
+ if (inode->i_nlink >= UDF_MAX_LINKS)
return -EMLINK;
- }
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
retval = -EMLINK;
- if (!new_inode &&
- new_dir->i_nlink >=
- (256 << sizeof(new_dir->i_nlink)) - 1)
+ if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
goto end_rename;
}
if (!nfi) {
@@ -1287,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
struct fid *fid = (struct fid *)fh;
int type = FILEID_UDF_WITHOUT_PARENT;
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *lenp = 5;
return 255;
+ } else if (len < 3) {
+ *lenp = 3;
+ return 255;
+ }
*lenp = 3;
fid->udf.block = location.logicalBlockNum;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 205030a707f..29309e25417 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -305,7 +305,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ufs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -317,12 +316,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= UFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ufs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -330,12 +326,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
ufs_delete_entry(old_dir, old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8378d..f83a4c830a6 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -2022,11 +2022,12 @@ xfs_buf_init(void)
if (!xfslogd_workqueue)
goto out_free_buf_zone;
- xfsdatad_workqueue = create_workqueue("xfsdatad");
+ xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
- xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+ xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
+ WQ_MEM_RECLAIM, 1);
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e..d61611c8801 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
+ if (!blk_queue_discard(q))
+ return -XFS_ERROR(EOPNOTSUPP);
if (copy_from_user(&range, urange, sizeof(range)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fd..f4f878fc008 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
* seven combinations work. The real answer is "don't use v2".
*/
len = xfs_fileid_length(fileid_type);
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
*max_len = len;
switch (fileid_type) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8..0ca0e3c024d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
xfs_mount_t *mp,
void __user *arg)
{
- xfs_fsop_geom_v1_t fsgeo;
+ xfs_fsop_geom_t fsgeo;
int error;
- error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+ error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
return -error;
- if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+ /*
+ * Caller should have passed an argument of type
+ * xfs_fsop_geom_v1_t. This is a proper subset of the
+ * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+ */
+ if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
return -XFS_ERROR(EFAULT);
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index bd5727852fd..9ff7fc603d2 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -102,7 +102,8 @@ xfs_mark_inode_dirty(
STATIC int
xfs_init_security(
struct inode *inode,
- struct inode *dir)
+ struct inode *dir,
+ const struct qstr *qstr)
{
struct xfs_inode *ip = XFS_I(inode);
size_t length;
@@ -110,7 +111,7 @@ xfs_init_security(
unsigned char *name;
int error;
- error = security_inode_init_security(inode, dir, (char **)&name,
+ error = security_inode_init_security(inode, dir, qstr, (char **)&name,
&value, &length);
if (error) {
if (error == -EOPNOTSUPP)
@@ -194,7 +195,7 @@ xfs_vn_mknod(
inode = VFS_I(ip);
- error = xfs_init_security(inode, dir);
+ error = xfs_init_security(inode, dir, &dentry->d_name);
if (unlikely(error))
goto out_cleanup_inode;
@@ -367,7 +368,7 @@ xfs_vn_symlink(
inode = VFS_I(cip);
- error = xfs_init_security(inode, dir);
+ error = xfs_init_security(inode, dir, &dentry->d_name);
if (unlikely(error))
goto out_cleanup_inode;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d..85668efb3e3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
xfs_fsop_geom_t *geo,
int new_version)
{
+
+ memset(geo, 0, sizeof(*geo));
+
geo->blocksize = mp->m_sb.sb_blocksize;
geo->rtextsize = mp->m_sb.sb_rextsize;
geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index edfa178bafb..4aff5639573 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void)
if (!xfs_mru_elem_zone)
goto out;
- xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
+ xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
if (!xfs_mru_reap_wq)
goto out_destroy_mru_elem_zone;