diff options
Diffstat (limited to 'fs')
244 files changed, 4461 insertions, 2847 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 296482fc77a..9ee5343d488 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -832,7 +832,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, * moved b under k and client parallely did a lookup for * k/b. */ - res = d_materialise_unique(dentry, inode); + res = d_splice_alias(inode, dentry); if (!res) v9fs_fid_add(dentry, fid); else if (!IS_ERR(res)) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 02b64f4e576..6054c16b8fa 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -826,8 +826,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, struct dentry *dir_dentry; struct posix_acl *dacl = NULL, *pacl = NULL; - p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", - dir->i_ino, dentry->d_name.name, omode, + p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", + dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); if (!new_valid_dev(rdev)) diff --git a/fs/Makefile b/fs/Makefile index 34a1b9dea6d..da0bbb456d3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ +obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index abc853968fe..937ce8754b2 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -125,7 +125,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index e217c511459..d0609a282e1 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -348,9 +348,9 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 u32 block = 0; int retval; - pr_debug("%s(dir=%u, inode=%u, \"%*s\", type=%d)\n", + pr_debug("%s(dir=%u, inode=%u, \"%pd\", type=%d)\n", __func__, (u32)dir->i_ino, - (u32)inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name, type); + (u32)inode->i_ino, dentry, type); retval = -EIO; bh = affs_bread(sb, inode->i_ino); diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 035bd31556f..bbc38530e92 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -190,8 +190,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry) toupper_t toupper = affs_get_toupper(sb); u32 key; - pr_debug("%s(\"%.*s\")\n", - __func__, (int)dentry->d_name.len, dentry->d_name.name); + pr_debug("%s(\"%pd\")\n", __func__, dentry); bh = affs_bread(sb, dir->i_ino); if (!bh) @@ -219,8 +218,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) struct buffer_head *bh; struct inode *inode = NULL; - pr_debug("%s(\"%.*s\")\n", - __func__, (int)dentry->d_name.len, dentry->d_name.name); + pr_debug("%s(\"%pd\")\n", __func__, dentry); affs_lock_dir(dir); bh = affs_find_entry(dir, dentry); @@ -250,9 +248,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) int affs_unlink(struct inode *dir, struct dentry *dentry) { - pr_debug("%s(dir=%d, %lu \"%.*s\")\n", + pr_debug("%s(dir=%d, %lu \"%pd\")\n", __func__, (u32)dir->i_ino, dentry->d_inode->i_ino, - (int)dentry->d_name.len, dentry->d_name.name); + dentry); return affs_remove_header(dentry); } @@ -264,9 +262,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) struct inode *inode; int error; - pr_debug("%s(%lu,\"%.*s\",0%ho)\n", - __func__, dir->i_ino, (int)dentry->d_name.len, - dentry->d_name.name,mode); + pr_debug("%s(%lu,\"%pd\",0%ho)\n", + __func__, dir->i_ino, dentry, mode); inode = affs_new_inode(dir); if (!inode) @@ -294,9 +291,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int error; - pr_debug("%s(%lu,\"%.*s\",0%ho)\n", - __func__, dir->i_ino, (int)dentry->d_name.len, - dentry->d_name.name, mode); + pr_debug("%s(%lu,\"%pd\",0%ho)\n", + __func__, dir->i_ino, dentry, mode); inode = affs_new_inode(dir); if (!inode) @@ -321,9 +317,9 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int affs_rmdir(struct inode *dir, struct dentry *dentry) { - pr_debug("%s(dir=%u, %lu \"%.*s\")\n", + pr_debug("%s(dir=%u, %lu \"%pd\")\n", __func__, (u32)dir->i_ino, dentry->d_inode->i_ino, - (int)dentry->d_name.len, dentry->d_name.name); + dentry); return affs_remove_header(dentry); } @@ -338,9 +334,8 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) int i, maxlen, error; char c, lc; - pr_debug("%s(%lu,\"%.*s\" -> \"%s\")\n", - __func__, dir->i_ino, (int)dentry->d_name.len, - dentry->d_name.name, symname); + pr_debug("%s(%lu,\"%pd\" -> \"%s\")\n", + __func__, dir->i_ino, dentry, symname); maxlen = AFFS_SB(sb)->s_hashsize * sizeof(u32) - 1; inode = affs_new_inode(dir); @@ -409,9 +404,9 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; - pr_debug("%s(%u, %u, \"%.*s\")\n", + pr_debug("%s(%u, %u, \"%pd\")\n", __func__, (u32)inode->i_ino, (u32)dir->i_ino, - (int)dentry->d_name.len,dentry->d_name.name); + dentry); return affs_add_entry(dir, inode, dentry, ST_LINKFILE); } @@ -424,10 +419,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh = NULL; int retval; - pr_debug("%s(old=%u,\"%*s\" to new=%u,\"%*s\")\n", - __func__, (u32)old_dir->i_ino, (int)old_dentry->d_name.len, - old_dentry->d_name.name, (u32)new_dir->i_ino, - (int)new_dentry->d_name.len, new_dentry->d_name.name); + pr_debug("%s(old=%u,\"%pd\" to new=%u,\"%pd\")\n", + __func__, (u32)old_dir->i_ino, old_dentry, + (u32)new_dir->i_ino, new_dentry); retval = affs_check_name(new_dentry->d_name.name, new_dentry->d_name.len, diff --git a/fs/afs/dir.c b/fs/afs/dir.c index a1645b88fe8..4ec35e9130e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -26,7 +26,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_release(struct dentry *dentry); -static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl); @@ -391,10 +391,11 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) * - if afs_dir_iterate_block() spots this function, it'll pass the FID * uniquifier through dtype */ -static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, - loff_t fpos, u64 ino, unsigned dtype) +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) { - struct afs_lookup_cookie *cookie = _cookie; + struct afs_lookup_cookie *cookie = + container_of(ctx, struct afs_lookup_cookie, ctx); _enter("{%s,%u},%s,%u,,%llu,%u", cookie->name.name, cookie->name.len, name, nlen, @@ -433,7 +434,7 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, }; int ret; - _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name); + _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); /* search the directory */ ret = afs_dir_iterate(dir, &cookie.ctx, key); @@ -465,8 +466,8 @@ static struct inode *afs_try_auto_mntpt( struct afs_vnode *vnode = AFS_FS_I(dir); struct inode *inode; - _enter("%d, %p{%s}, {%x:%u}, %p", - ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key); + _enter("%d, %p{%pd}, {%x:%u}, %p", + ret, dentry, dentry, vnode->fid.vid, vnode->fid.vnode, key); if (ret != -ENOENT || !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) @@ -501,8 +502,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, vnode = AFS_FS_I(dir); - _enter("{%x:%u},%p{%s},", - vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name); + _enter("{%x:%u},%p{%pd},", + vnode->fid.vid, vnode->fid.vnode, dentry, dentry); ASSERTCMP(dentry->d_inode, ==, NULL); @@ -588,11 +589,11 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) vnode = AFS_FS_I(dentry->d_inode); if (dentry->d_inode) - _enter("{v={%x:%u} n=%s fl=%lx},", - vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, + _enter("{v={%x:%u} n=%pd fl=%lx},", + vnode->fid.vid, vnode->fid.vnode, dentry, vnode->flags); else - _enter("{neg n=%s}", dentry->d_name.name); + _enter("{neg n=%pd}", dentry); key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell); if (IS_ERR(key)) @@ -607,7 +608,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) afs_validate(dir, key); if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { - _debug("%s: parent dir deleted", dentry->d_name.name); + _debug("%pd: parent dir deleted", dentry); goto out_bad; } @@ -625,16 +626,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (!dentry->d_inode) goto out_bad; if (is_bad_inode(dentry->d_inode)) { - printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n", - parent->d_name.name, dentry->d_name.name); + printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", + dentry); goto out_bad; } /* if the vnode ID has changed, then the dirent points to a * different file */ if (fid.vnode != vnode->fid.vnode) { - _debug("%s: dirent changed [%u != %u]", - dentry->d_name.name, fid.vnode, + _debug("%pd: dirent changed [%u != %u]", + dentry, fid.vnode, vnode->fid.vnode); goto not_found; } @@ -643,8 +644,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) * been deleted and replaced, and the original vnode ID has * been reused */ if (fid.unique != vnode->fid.unique) { - _debug("%s: file deleted (uq %u -> %u I:%u)", - dentry->d_name.name, fid.unique, + _debug("%pd: file deleted (uq %u -> %u I:%u)", + dentry, fid.unique, vnode->fid.unique, dentry->d_inode->i_generation); spin_lock(&vnode->lock); @@ -656,14 +657,14 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) case -ENOENT: /* the filename is unknown */ - _debug("%s: dirent not found", dentry->d_name.name); + _debug("%pd: dirent not found", dentry); if (dentry->d_inode) goto not_found; goto out_valid; default: - _debug("failed to iterate dir %s: %d", - parent->d_name.name, ret); + _debug("failed to iterate dir %pd: %d", + parent, ret); goto out_bad; } @@ -681,8 +682,7 @@ not_found: spin_unlock(&dentry->d_lock); out_bad: - _debug("dropping dentry %s/%s", - parent->d_name.name, dentry->d_name.name); + _debug("dropping dentry %pd2", dentry); dput(parent); key_put(key); @@ -698,7 +698,7 @@ out_bad: */ static int afs_d_delete(const struct dentry *dentry) { - _enter("%s", dentry->d_name.name); + _enter("%pd", dentry); if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto zap; @@ -721,7 +721,7 @@ zap: */ static void afs_d_release(struct dentry *dentry) { - _enter("%s", dentry->d_name.name); + _enter("%pd", dentry); } /* @@ -740,8 +740,8 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%s},%ho", - dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); + _enter("{%x:%u},{%pd},%ho", + dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { @@ -801,8 +801,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%s}", - dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); + _enter("{%x:%u},{%pd}", + dvnode->fid.vid, dvnode->fid.vnode, dentry); key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { @@ -843,8 +843,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%s}", - dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); + _enter("{%x:%u},{%pd}", + dvnode->fid.vid, dvnode->fid.vnode, dentry); ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) @@ -917,8 +917,8 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%s},%ho,", - dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); + _enter("{%x:%u},{%pd},%ho,", + dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { @@ -980,10 +980,10 @@ static int afs_link(struct dentry *from, struct inode *dir, vnode = AFS_FS_I(from->d_inode); dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%x:%u},{%s}", + _enter("{%x:%u},{%x:%u},{%pd}", vnode->fid.vid, vnode->fid.vnode, dvnode->fid.vid, dvnode->fid.vnode, - dentry->d_name.name); + dentry); key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { @@ -1025,8 +1025,8 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%s},%s", - dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, + _enter("{%x:%u},{%pd},%s", + dvnode->fid.vid, dvnode->fid.vnode, dentry, content); ret = -EINVAL; @@ -1093,11 +1093,11 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); - _enter("{%x:%u},{%x:%u},{%x:%u},{%s}", + _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, vnode->fid.vid, vnode->fid.vnode, new_dvnode->fid.vid, new_dvnode->fid.vnode, - new_dentry->d_name.name); + new_dentry); key = afs_request_key(orig_dvnode->volume->cell); if (IS_ERR(key)) { diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 29467128844..8a1d38ef0fc 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -462,8 +462,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) struct key *key; int ret; - _enter("{%x:%u},{n=%s},%x", - vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, + _enter("{%x:%u},{n=%pd},%x", + vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 9682c33d5da..938c5ab06d5 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -106,14 +106,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - _enter("%p,%p{%p{%s},%s}", - dir, - dentry, - dentry->d_parent, - dentry->d_parent ? - dentry->d_parent->d_name.name : (const unsigned char *) "", - dentry->d_name.name); - + _enter("%p,%p{%pd2}", dir, dentry, dentry); return ERR_PTR(-EREMOTE); } @@ -122,14 +115,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir, */ static int afs_mntpt_open(struct inode *inode, struct file *file) { - _enter("%p,%p{%p{%s},%s}", - inode, file, - file->f_path.dentry->d_parent, - file->f_path.dentry->d_parent ? - file->f_path.dentry->d_parent->d_name.name : - (const unsigned char *) "", - file->f_path.dentry->d_name.name); - + _enter("%p,%p{%pD2}", inode, file, file); return -EREMOTE; } @@ -146,7 +132,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) bool rwpath = false; int ret; - _enter("{%s}", mntpt->d_name.name); + _enter("{%pd}", mntpt); BUG_ON(!mntpt->d_inode); @@ -242,7 +228,7 @@ struct vfsmount *afs_d_automount(struct path *path) { struct vfsmount *newmnt; - _enter("{%s}", path->dentry->d_name.name); + _enter("{%pd}", path->dentry); newmnt = afs_mntpt_do_automount(path->dentry); if (IS_ERR(newmnt)) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 03a3beb1700..06e14bfb349 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, _debug("- range %u-%u%s", offset, to, msg->msg_flags ? " [more]" : ""); - msg->msg_iov = (struct iovec *) iov; - msg->msg_iovlen = 1; + iov_iter_init(&msg->msg_iter, WRITE, + (struct iovec *) iov, 1, to - offset); /* have to change the state *before* sending the last * packet as RxRPC might give us the reply before it @@ -384,8 +384,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_name = NULL; msg.msg_namelen = 0; - msg.msg_iov = (struct iovec *) iov; - msg.msg_iovlen = 1; + iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1, + call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = (call->send_pages ? MSG_MORE : 0); @@ -778,8 +778,7 @@ void afs_send_empty_reply(struct afs_call *call) iov[0].iov_len = 0; msg.msg_name = NULL; msg.msg_namelen = 0; - msg.msg_iov = iov; - msg.msg_iovlen = 0; + iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0); /* WTF? */ msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -815,8 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) iov[0].iov_len = len; msg.msg_name = NULL; msg.msg_namelen = 0; - msg.msg_iov = iov; - msg.msg_iovlen = 1; + iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; diff --git a/fs/afs/write.c b/fs/afs/write.c index ab6adfd5251..c13cb08964e 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -682,14 +682,13 @@ int afs_writeback_all(struct afs_vnode *vnode) */ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = file->f_mapping->host; + struct inode *inode = file_inode(file); struct afs_writeback *wb, *xwb; - struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + struct afs_vnode *vnode = AFS_FS_I(inode); int ret; - _enter("{%x:%u},{n=%s},%d", - vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, + _enter("{%x:%u},{n=%pD},%d", + vnode->fid.vid, vnode->fid.vnode, file, datasync); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -165,6 +165,15 @@ static struct vfsmount *aio_mnt; static const struct file_operations aio_ring_fops; static const struct address_space_operations aio_ctx_aops; +/* Backing dev info for aio fs. + * -no dirty page accounting or writeback happens + */ +static struct backing_dev_info aio_fs_backing_dev_info = { + .name = "aiofs", + .state = 0, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY, +}; + static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { struct qstr this = QSTR_INIT("[aio]", 5); @@ -176,6 +185,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) inode->i_mapping->a_ops = &aio_ctx_aops; inode->i_mapping->private_data = ctx; + inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info; inode->i_size = PAGE_SIZE * nr_pages; path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); @@ -220,6 +230,9 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); + if (bdi_init(&aio_fs_backing_dev_info)) + panic("Failed to init aio fs backing dev info."); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -281,11 +294,6 @@ static const struct file_operations aio_ring_fops = { .mmap = aio_ring_mmap, }; -static int aio_set_page_dirty(struct page *page) -{ - return 0; -} - #if IS_ENABLED(CONFIG_MIGRATION) static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) @@ -357,7 +365,7 @@ out: #endif static const struct address_space_operations aio_ctx_aops = { - .set_page_dirty = aio_set_page_dirty, + .set_page_dirty = __set_page_dirty_no_writeback, #if IS_ENABLED(CONFIG_MIGRATION) .migratepage = aio_migratepage, #endif @@ -412,7 +420,6 @@ static int aio_setup_ring(struct kioctx *ctx) pr_debug("pid(%d) page[%d]->count=%d\n", current->pid, i, page_count(page)); SetPageUptodate(page); - SetPageDirty(page); unlock_page(page); ctx->ring_pages[i] = page; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 683a5b9ce22..bfdbaba9c2b 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -41,8 +41,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) struct path path = {.mnt = mnt, .dentry = dentry}; int status = 1; - DPRINTK("dentry %p %.*s", - dentry, (int)dentry->d_name.len, dentry->d_name.name); + DPRINTK("dentry %p %pd", dentry, dentry); path_get(&path); @@ -85,7 +84,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev, spin_lock(&root->d_lock); if (prev) - next = prev->d_u.d_child.next; + next = prev->d_child.next; else { prev = dget_dlock(root); next = prev->d_subdirs.next; @@ -99,13 +98,13 @@ cont: return NULL; } - q = list_entry(next, struct dentry, d_u.d_child); + q = list_entry(next, struct dentry, d_child); spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); /* Already gone or negative dentry (under construction) - try next */ if (!d_count(q) || !simple_positive(q)) { spin_unlock(&q->d_lock); - next = q->d_u.d_child.next; + next = q->d_child.next; goto cont; } dget_dlock(q); @@ -155,13 +154,13 @@ again: goto relock; } spin_unlock(&p->d_lock); - next = p->d_u.d_child.next; + next = p->d_child.next; p = parent; if (next != &parent->d_subdirs) break; } } - ret = list_entry(next, struct dentry, d_u.d_child); + ret = list_entry(next, struct dentry, d_child); spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); /* Negative dentry - try next */ @@ -192,8 +191,7 @@ static int autofs4_direct_busy(struct vfsmount *mnt, unsigned long timeout, int do_now) { - DPRINTK("top %p %.*s", - top, (int) top->d_name.len, top->d_name.name); + DPRINTK("top %p %pd", top, top); /* If it's busy update the expiry counters */ if (!may_umount_tree(mnt)) { @@ -221,8 +219,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, struct autofs_info *top_ino = autofs4_dentry_ino(top); struct dentry *p; - DPRINTK("top %p %.*s", - top, (int)top->d_name.len, top->d_name.name); + DPRINTK("top %p %pd", top, top); /* Negative dentry - give up */ if (!simple_positive(top)) @@ -230,8 +227,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, p = NULL; while ((p = get_next_positive_dentry(p, top))) { - DPRINTK("dentry %p %.*s", - p, (int) p->d_name.len, p->d_name.name); + DPRINTK("dentry %p %pd", p, p); /* * Is someone visiting anywhere in the subtree ? @@ -277,13 +273,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, { struct dentry *p; - DPRINTK("parent %p %.*s", - parent, (int)parent->d_name.len, parent->d_name.name); + DPRINTK("parent %p %pd", parent, parent); p = NULL; while ((p = get_next_positive_dentry(p, parent))) { - DPRINTK("dentry %p %.*s", - p, (int) p->d_name.len, p->d_name.name); + DPRINTK("dentry %p %pd", p, p); if (d_mountpoint(p)) { /* Can we umount this guy */ @@ -368,8 +362,7 @@ static struct dentry *should_expire(struct dentry *dentry, * offset (autofs-5.0+). */ if (d_mountpoint(dentry)) { - DPRINTK("checking mountpoint %p %.*s", - dentry, (int)dentry->d_name.len, dentry->d_name.name); + DPRINTK("checking mountpoint %p %pd", dentry, dentry); /* Can we umount this guy */ if (autofs4_mount_busy(mnt, dentry)) @@ -382,8 +375,7 @@ static struct dentry *should_expire(struct dentry *dentry, } if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) { - DPRINTK("checking symlink %p %.*s", - dentry, (int)dentry->d_name.len, dentry->d_name.name); + DPRINTK("checking symlink %p %pd", dentry, dentry); /* * A symlink can't be "busy" in the usual sense so * just check last used for expire timeout. @@ -479,8 +471,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, return NULL; found: - DPRINTK("returning %p %.*s", - expired, (int)expired->d_name.len, expired->d_name.name); + DPRINTK("returning %p %pd", expired, expired); ino->flags |= AUTOFS_INF_EXPIRING; smp_mb(); ino->flags &= ~AUTOFS_INF_NO_RCU; @@ -489,7 +480,7 @@ found: spin_lock(&sbi->lookup_lock); spin_lock(&expired->d_parent->d_lock); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + list_move(&expired->d_parent->d_subdirs, &expired->d_child); spin_unlock(&expired->d_lock); spin_unlock(&expired->d_parent->d_lock); spin_unlock(&sbi->lookup_lock); @@ -512,8 +503,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) if (ino->flags & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); - DPRINTK("waiting for expire %p name=%.*s", - dentry, dentry->d_name.len, dentry->d_name.name); + DPRINTK("waiting for expire %p name=%pd", dentry, dentry); status = autofs4_wait(sbi, dentry, NFY_NONE); wait_for_completion(&ino->expire_complete); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d76d083f2f0..dbb5b7212ce 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -108,8 +108,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) struct dentry *dentry = file->f_path.dentry; struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); - DPRINTK("file=%p dentry=%p %.*s", - file, dentry, dentry->d_name.len, dentry->d_name.name); + DPRINTK("file=%p dentry=%p %pD", file, dentry, dentry); if (autofs4_oz_mode(sbi)) goto out; @@ -279,8 +278,7 @@ static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk) if (ino->flags & AUTOFS_INF_PENDING) { if (rcu_walk) return -ECHILD; - DPRINTK("waiting for mount name=%.*s", - dentry->d_name.len, dentry->d_name.name); + DPRINTK("waiting for mount name=%pd", dentry); status = autofs4_wait(sbi, dentry, NFY_MOUNT); DPRINTK("mount wait done status=%d", status); } @@ -340,8 +338,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path) struct autofs_info *ino = autofs4_dentry_ino(dentry); int status; - DPRINTK("dentry=%p %.*s", - dentry, dentry->d_name.len, dentry->d_name.name); + DPRINTK("dentry=%p %pd", dentry, dentry); /* The daemon never triggers a mount. */ if (autofs4_oz_mode(sbi)) @@ -428,8 +425,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) struct autofs_info *ino = autofs4_dentry_ino(dentry); int status; - DPRINTK("dentry=%p %.*s", - dentry, dentry->d_name.len, dentry->d_name.name); + DPRINTK("dentry=%p %pd", dentry, dentry); /* The daemon never waits. */ if (autofs4_oz_mode(sbi)) { @@ -504,7 +500,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u struct autofs_info *ino; struct dentry *active; - DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name); + DPRINTK("name = %pd", dentry); /* File name too long to exist */ if (dentry->d_name.len > NAME_MAX) @@ -558,8 +554,7 @@ static int autofs4_dir_symlink(struct inode *dir, size_t size = strlen(symname); char *cp; - DPRINTK("%s <- %.*s", symname, - dentry->d_name.len, dentry->d_name.name); + DPRINTK("%s <- %pd", symname, dentry); if (!autofs4_oz_mode(sbi)) return -EACCES; @@ -687,7 +682,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry) /* only consider parents below dentrys in the root */ if (IS_ROOT(parent->d_parent)) return; - d_child = &dentry->d_u.d_child; + d_child = &dentry->d_child; /* Set parent managed if it's becoming empty */ if (d_child->next == &parent->d_subdirs && d_child->prev == &parent->d_subdirs) @@ -701,8 +696,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) struct autofs_info *ino = autofs4_dentry_ino(dentry); struct autofs_info *p_ino; - DPRINTK("dentry %p, removing %.*s", - dentry, dentry->d_name.len, dentry->d_name.name); + DPRINTK("dentry %p, removing %pd", dentry, dentry); if (!autofs4_oz_mode(sbi)) return -EACCES; @@ -744,8 +738,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m if (!autofs4_oz_mode(sbi)) return -EACCES; - DPRINTK("dentry %p, creating %.*s", - dentry, dentry->d_name.len, dentry->d_name.name); + DPRINTK("dentry %p, creating %pd", dentry, dentry); BUG_ON(!ino); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 4cf61ec6b7a..b94d1cc9cd3 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -172,8 +172,8 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) char *utfname; const char *name = dentry->d_name.name; - befs_debug(sb, "---> %s name %s inode %ld", __func__, - dentry->d_name.name, dir->i_ino); + befs_debug(sb, "---> %s name %pd inode %ld", __func__, + dentry, dir->i_ino); /* Convert to UTF-8 */ if (BEFS_SB(sb)->nls) { @@ -191,8 +191,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) } if (ret == BEFS_BT_NOT_FOUND) { - befs_debug(sb, "<--- %s %s not found", __func__, - dentry->d_name.name); + befs_debug(sb, "<--- %s %pd not found", __func__, dentry); return ERR_PTR(-ENOENT); } else if (ret != BEFS_OK || offset == 0) { @@ -222,10 +221,9 @@ befs_readdir(struct file *file, struct dir_context *ctx) size_t keysize; unsigned char d_type; char keybuf[BEFS_NAME_LEN + 1]; - const char *dirname = file->f_path.dentry->d_name.name; - befs_debug(sb, "---> %s name %s, inode %ld, ctx->pos %lld", - __func__, dirname, inode->i_ino, ctx->pos); + befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld", + __func__, file, inode->i_ino, ctx->pos); more: result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, @@ -233,8 +231,8 @@ more: if (result == BEFS_ERR) { befs_debug(sb, "<--- %s ERROR", __func__); - befs_error(sb, "IO error reading %s (inode %lu)", - dirname, inode->i_ino); + befs_error(sb, "IO error reading %pD (inode %lu)", + file, inode->i_ino); return -EIO; } else if (result == BEFS_BT_END) { diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 929dec08c34..4c556680fa7 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -292,8 +292,8 @@ static int load_aout_binary(struct linux_binprm * bprm) if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit()) { printk(KERN_WARNING - "fd_offset is not page aligned. Please convert program: %s\n", - bprm->file->f_path.dentry->d_name.name); + "fd_offset is not page aligned. Please convert program: %pD\n", + bprm->file); } if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { @@ -375,8 +375,8 @@ static int load_aout_library(struct file *file) if (printk_ratelimit()) { printk(KERN_WARNING - "N_TXTOFF is not page aligned. Please convert library: %s\n", - file->f_path.dentry->d_name.name); + "N_TXTOFF is not page aligned. Please convert library: %pD\n", + file); } vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d8fc0605b9d..3a6175fe10c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1994,18 +1994,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, shdr4extnum->sh_info = segs; } -static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma, - unsigned long mm_flags) -{ - struct vm_area_struct *vma; - size_t size = 0; - - for (vma = first_vma(current, gate_vma); vma != NULL; - vma = next_vma(vma, gate_vma)) - size += vma_dump_size(vma, mm_flags); - return size; -} - /* * Actual dumper * @@ -2017,7 +2005,8 @@ static int elf_core_dump(struct coredump_params *cprm) { int has_dumped = 0; mm_segment_t fs; - int segs; + int segs, i; + size_t vma_data_size = 0; struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff; @@ -2026,6 +2015,7 @@ static int elf_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; + elf_addr_t *vma_filesz = NULL; /* * We no longer stop all VM operations. @@ -2093,7 +2083,20 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags); + vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL); + if (!vma_filesz) + goto end_coredump; + + for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; + vma = next_vma(vma, gate_vma)) { + unsigned long dump_size; + + dump_size = vma_dump_size(vma, cprm->mm_flags); + vma_filesz[i++] = dump_size; + vma_data_size += dump_size; + } + + offset += vma_data_size; offset += elf_core_extra_data_size(); e_shoff = offset; @@ -2113,7 +2116,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Write program headers for segments dump */ - for (vma = first_vma(current, gate_vma); vma != NULL; + for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; vma = next_vma(vma, gate_vma)) { struct elf_phdr phdr; @@ -2121,7 +2124,7 @@ static int elf_core_dump(struct coredump_params *cprm) phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; - phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags); + phdr.p_filesz = vma_filesz[i++]; phdr.p_memsz = vma->vm_end - vma->vm_start; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; @@ -2149,12 +2152,12 @@ static int elf_core_dump(struct coredump_params *cprm) if (!dump_skip(cprm, dataoff - cprm->written)) goto end_coredump; - for (vma = first_vma(current, gate_vma); vma != NULL; + for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; vma = next_vma(vma, gate_vma)) { unsigned long addr; unsigned long end; - end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags); + end = vma->vm_start + vma_filesz[i++]; for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { struct page *page; @@ -2187,6 +2190,7 @@ end_coredump: cleanup: free_note_info(&info); kfree(shdr4extnum); + kfree(vma_filesz); kfree(phdr4note); kfree(elf); out: diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index fd8beb9657a..70789e198de 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -1,21 +1,14 @@ /* - * binfmt_misc.c + * binfmt_misc.c * - * Copyright (C) 1997 Richard Günther + * Copyright (C) 1997 Richard Günther * - * binfmt_misc detects binaries via a magic or filename extension and invokes - * a specified wrapper. This should obsolete binfmt_java, binfmt_em86 and - * binfmt_mz. - * - * 1997-04-25 first version - * [...] - * 1997-05-19 cleanup - * 1997-06-26 hpa: pass the real filename rather than argv[0] - * 1997-06-30 minor cleanup - * 1997-08-09 removed extension stripping, locking cleanup - * 2001-02-28 AV: rewritten into something that resembles C. Original didn't. + * binfmt_misc detects binaries via a magic or filename extension and invokes + * a specified wrapper. See Documentation/binfmt_misc.txt for more details. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> @@ -30,8 +23,13 @@ #include <linux/mount.h> #include <linux/syscalls.h> #include <linux/fs.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> +#ifdef DEBUG +# define USE_DEBUG 1 +#else +# define USE_DEBUG 0 +#endif enum { VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */ @@ -41,9 +39,9 @@ static LIST_HEAD(entries); static int enabled = 1; enum {Enabled, Magic}; -#define MISC_FMT_PRESERVE_ARGV0 (1<<31) -#define MISC_FMT_OPEN_BINARY (1<<30) -#define MISC_FMT_CREDENTIALS (1<<29) +#define MISC_FMT_PRESERVE_ARGV0 (1 << 31) +#define MISC_FMT_OPEN_BINARY (1 << 30) +#define MISC_FMT_CREDENTIALS (1 << 29) typedef struct { struct list_head list; @@ -87,20 +85,24 @@ static Node *check_file(struct linux_binprm *bprm) char *p = strrchr(bprm->interp, '.'); struct list_head *l; + /* Walk all the registered handlers. */ list_for_each(l, &entries) { Node *e = list_entry(l, Node, list); char *s; int j; + /* Make sure this one is currently enabled. */ if (!test_bit(Enabled, &e->flags)) continue; + /* Do matching based on extension if applicable. */ if (!test_bit(Magic, &e->flags)) { if (p && !strcmp(e->magic, p + 1)) return e; continue; } + /* Do matching based on magic & mask. */ s = bprm->buf + e->offset; if (e->mask) { for (j = 0; j < e->size; j++) @@ -123,7 +125,7 @@ static Node *check_file(struct linux_binprm *bprm) static int load_misc_binary(struct linux_binprm *bprm) { Node *fmt; - struct file * interp_file = NULL; + struct file *interp_file = NULL; char iname[BINPRM_BUF_SIZE]; const char *iname_addr = iname; int retval; @@ -131,7 +133,7 @@ static int load_misc_binary(struct linux_binprm *bprm) retval = -ENOEXEC; if (!enabled) - goto _ret; + goto ret; /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); @@ -140,25 +142,26 @@ static int load_misc_binary(struct linux_binprm *bprm) strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE); read_unlock(&entries_lock); if (!fmt) - goto _ret; + goto ret; if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { retval = remove_arg_zero(bprm); if (retval) - goto _ret; + goto ret; } if (fmt->flags & MISC_FMT_OPEN_BINARY) { /* if the binary should be opened on behalf of the * interpreter than keep it open and assign descriptor - * to it */ - fd_binary = get_unused_fd(); - if (fd_binary < 0) { - retval = fd_binary; - goto _ret; - } - fd_install(fd_binary, bprm->file); + * to it + */ + fd_binary = get_unused_fd_flags(0); + if (fd_binary < 0) { + retval = fd_binary; + goto ret; + } + fd_install(fd_binary, bprm->file); /* if the binary is not readable than enforce mm->dumpable=0 regardless of the interpreter's permissions */ @@ -171,32 +174,32 @@ static int load_misc_binary(struct linux_binprm *bprm) bprm->interp_flags |= BINPRM_FLAGS_EXECFD; bprm->interp_data = fd_binary; - } else { - allow_write_access(bprm->file); - fput(bprm->file); - bprm->file = NULL; - } + } else { + allow_write_access(bprm->file); + fput(bprm->file); + bprm->file = NULL; + } /* make argv[1] be the path to the binary */ - retval = copy_strings_kernel (1, &bprm->interp, bprm); + retval = copy_strings_kernel(1, &bprm->interp, bprm); if (retval < 0) - goto _error; + goto error; bprm->argc++; /* add the interp as argv[0] */ - retval = copy_strings_kernel (1, &iname_addr, bprm); + retval = copy_strings_kernel(1, &iname_addr, bprm); if (retval < 0) - goto _error; - bprm->argc ++; + goto error; + bprm->argc++; /* Update interp in case binfmt_script needs it. */ retval = bprm_change_interp(iname, bprm); if (retval < 0) - goto _error; + goto error; - interp_file = open_exec (iname); - retval = PTR_ERR (interp_file); - if (IS_ERR (interp_file)) - goto _error; + interp_file = open_exec(iname); + retval = PTR_ERR(interp_file); + if (IS_ERR(interp_file)) + goto error; bprm->file = interp_file; if (fmt->flags & MISC_FMT_CREDENTIALS) { @@ -207,23 +210,23 @@ static int load_misc_binary(struct linux_binprm *bprm) memset(bprm->buf, 0, BINPRM_BUF_SIZE); retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE); } else - retval = prepare_binprm (bprm); + retval = prepare_binprm(bprm); if (retval < 0) - goto _error; + goto error; retval = search_binary_handler(bprm); if (retval < 0) - goto _error; + goto error; -_ret: +ret: return retval; -_error: +error: if (fd_binary > 0) sys_close(fd_binary); bprm->interp_flags = 0; bprm->interp_data = 0; - goto _ret; + goto ret; } /* Command parsers */ @@ -250,36 +253,40 @@ static char *scanarg(char *s, char del) return s; } -static char * check_special_flags (char * sfs, Node * e) +static char *check_special_flags(char *sfs, Node *e) { - char * p = sfs; + char *p = sfs; int cont = 1; /* special flags */ while (cont) { switch (*p) { - case 'P': - p++; - e->flags |= MISC_FMT_PRESERVE_ARGV0; - break; - case 'O': - p++; - e->flags |= MISC_FMT_OPEN_BINARY; - break; - case 'C': - p++; - /* this flags also implies the - open-binary flag */ - e->flags |= (MISC_FMT_CREDENTIALS | - MISC_FMT_OPEN_BINARY); - break; - default: - cont = 0; + case 'P': + pr_debug("register: flag: P (preserve argv0)\n"); + p++; + e->flags |= MISC_FMT_PRESERVE_ARGV0; + break; + case 'O': + pr_debug("register: flag: O (open binary)\n"); + p++; + e->flags |= MISC_FMT_OPEN_BINARY; + break; + case 'C': + pr_debug("register: flag: C (preserve creds)\n"); + p++; + /* this flags also implies the + open-binary flag */ + e->flags |= (MISC_FMT_CREDENTIALS | + MISC_FMT_OPEN_BINARY); + break; + default: + cont = 0; } } return p; } + /* * This registers a new binary format, it recognises the syntax * ':name:type:offset:magic:mask:interpreter:flags' @@ -292,6 +299,8 @@ static Node *create_entry(const char __user *buffer, size_t count) char *buf, *p; char del; + pr_debug("register: received %zu bytes\n", count); + /* some sanity checks */ err = -EINVAL; if ((count < 11) || (count > MAX_REGISTER_LENGTH)) @@ -299,7 +308,7 @@ static Node *create_entry(const char __user *buffer, size_t count) err = -ENOMEM; memsize = sizeof(Node) + count + 8; - e = kmalloc(memsize, GFP_USER); + e = kmalloc(memsize, GFP_KERNEL); if (!e) goto out; @@ -307,98 +316,175 @@ static Node *create_entry(const char __user *buffer, size_t count) memset(e, 0, sizeof(Node)); if (copy_from_user(buf, buffer, count)) - goto Efault; + goto efault; del = *p++; /* delimeter */ - memset(buf+count, del, 8); + pr_debug("register: delim: %#x {%c}\n", del, del); + + /* Pad the buffer with the delim to simplify parsing below. */ + memset(buf + count, del, 8); + /* Parse the 'name' field. */ e->name = p; p = strchr(p, del); if (!p) - goto Einval; + goto einval; *p++ = '\0'; if (!e->name[0] || !strcmp(e->name, ".") || !strcmp(e->name, "..") || strchr(e->name, '/')) - goto Einval; + goto einval; + + pr_debug("register: name: {%s}\n", e->name); + + /* Parse the 'type' field. */ switch (*p++) { - case 'E': e->flags = 1<<Enabled; break; - case 'M': e->flags = (1<<Enabled) | (1<<Magic); break; - default: goto Einval; + case 'E': + pr_debug("register: type: E (extension)\n"); + e->flags = 1 << Enabled; + break; + case 'M': + pr_debug("register: type: M (magic)\n"); + e->flags = (1 << Enabled) | (1 << Magic); + break; + default: + goto einval; } if (*p++ != del) - goto Einval; + goto einval; + if (test_bit(Magic, &e->flags)) { - char *s = strchr(p, del); + /* Handle the 'M' (magic) format. */ + char *s; + + /* Parse the 'offset' field. */ + s = strchr(p, del); if (!s) - goto Einval; + goto einval; *s++ = '\0'; e->offset = simple_strtoul(p, &p, 10); if (*p++) - goto Einval; + goto einval; + pr_debug("register: offset: %#x\n", e->offset); + + /* Parse the 'magic' field. */ e->magic = p; p = scanarg(p, del); if (!p) - goto Einval; + goto einval; p[-1] = '\0'; - if (!e->magic[0]) - goto Einval; + if (p == e->magic) + goto einval; + if (USE_DEBUG) + print_hex_dump_bytes( + KBUILD_MODNAME ": register: magic[raw]: ", + DUMP_PREFIX_NONE, e->magic, p - e->magic); + + /* Parse the 'mask' field. */ e->mask = p; p = scanarg(p, del); if (!p) - goto Einval; + goto einval; p[-1] = '\0'; - if (!e->mask[0]) + if (p == e->mask) { e->mask = NULL; + pr_debug("register: mask[raw]: none\n"); + } else if (USE_DEBUG) + print_hex_dump_bytes( + KBUILD_MODNAME ": register: mask[raw]: ", + DUMP_PREFIX_NONE, e->mask, p - e->mask); + + /* + * Decode the magic & mask fields. + * Note: while we might have accepted embedded NUL bytes from + * above, the unescape helpers here will stop at the first one + * it encounters. + */ e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX); if (e->mask && string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) - goto Einval; + goto einval; if (e->size + e->offset > BINPRM_BUF_SIZE) - goto Einval; + goto einval; + pr_debug("register: magic/mask length: %i\n", e->size); + if (USE_DEBUG) { + print_hex_dump_bytes( + KBUILD_MODNAME ": register: magic[decoded]: ", + DUMP_PREFIX_NONE, e->magic, e->size); + + if (e->mask) { + int i; + char *masked = kmalloc(e->size, GFP_KERNEL); + + print_hex_dump_bytes( + KBUILD_MODNAME ": register: mask[decoded]: ", + DUMP_PREFIX_NONE, e->mask, e->size); + + if (masked) { + for (i = 0; i < e->size; ++i) + masked[i] = e->magic[i] & e->mask[i]; + print_hex_dump_bytes( + KBUILD_MODNAME ": register: magic[masked]: ", + DUMP_PREFIX_NONE, masked, e->size); + + kfree(masked); + } + } + } } else { + /* Handle the 'E' (extension) format. */ + + /* Skip the 'offset' field. */ p = strchr(p, del); if (!p) - goto Einval; + goto einval; *p++ = '\0'; + + /* Parse the 'magic' field. */ e->magic = p; p = strchr(p, del); if (!p) - goto Einval; + goto einval; *p++ = '\0'; if (!e->magic[0] || strchr(e->magic, '/')) - goto Einval; + goto einval; + pr_debug("register: extension: {%s}\n", e->magic); + + /* Skip the 'mask' field. */ p = strchr(p, del); if (!p) - goto Einval; + goto einval; *p++ = '\0'; } + + /* Parse the 'interpreter' field. */ e->interpreter = p; p = strchr(p, del); if (!p) - goto Einval; + goto einval; *p++ = '\0'; if (!e->interpreter[0]) - goto Einval; - - - p = check_special_flags (p, e); + goto einval; + pr_debug("register: interpreter: {%s}\n", e->interpreter); + /* Parse the 'flags' field. */ + p = check_special_flags(p, e); if (*p == '\n') p++; if (p != buf + count) - goto Einval; + goto einval; + return e; out: return ERR_PTR(err); -Efault: +efault: kfree(e); return ERR_PTR(-EFAULT); -Einval: +einval: kfree(e); return ERR_PTR(-EINVAL); } @@ -417,7 +503,7 @@ static int parse_command(const char __user *buffer, size_t count) return -EFAULT; if (!count) return 0; - if (s[count-1] == '\n') + if (s[count - 1] == '\n') count--; if (count == 1 && s[0] == '0') return 1; @@ -434,7 +520,7 @@ static void entry_status(Node *e, char *page) { char *dp; char *status = "disabled"; - const char * flags = "flags: "; + const char *flags = "flags: "; if (test_bit(Enabled, &e->flags)) status = "enabled"; @@ -448,19 +534,15 @@ static void entry_status(Node *e, char *page) dp = page + strlen(page); /* print the special flags */ - sprintf (dp, "%s", flags); - dp += strlen (flags); - if (e->flags & MISC_FMT_PRESERVE_ARGV0) { - *dp ++ = 'P'; - } - if (e->flags & MISC_FMT_OPEN_BINARY) { - *dp ++ = 'O'; - } - if (e->flags & MISC_FMT_CREDENTIALS) { - *dp ++ = 'C'; - } - *dp ++ = '\n'; - + sprintf(dp, "%s", flags); + dp += strlen(flags); + if (e->flags & MISC_FMT_PRESERVE_ARGV0) + *dp++ = 'P'; + if (e->flags & MISC_FMT_OPEN_BINARY) + *dp++ = 'O'; + if (e->flags & MISC_FMT_CREDENTIALS) + *dp++ = 'C'; + *dp++ = '\n'; if (!test_bit(Magic, &e->flags)) { sprintf(dp, "extension .%s\n", e->magic); @@ -488,7 +570,7 @@ static void entry_status(Node *e, char *page) static struct inode *bm_get_inode(struct super_block *sb, int mode) { - struct inode * inode = new_inode(sb); + struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); @@ -528,13 +610,14 @@ static void kill_node(Node *e) /* /<entry> */ static ssize_t -bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) +bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { Node *e = file_inode(file)->i_private; ssize_t res; char *page; - if (!(page = (char*) __get_free_page(GFP_KERNEL))) + page = (char *) __get_free_page(GFP_KERNEL); + if (!page) return -ENOMEM; entry_status(e, page); @@ -553,20 +636,28 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, int res = parse_command(buffer, count); switch (res) { - case 1: clear_bit(Enabled, &e->flags); - break; - case 2: set_bit(Enabled, &e->flags); - break; - case 3: root = dget(file->f_path.dentry->d_sb->s_root); - mutex_lock(&root->d_inode->i_mutex); - - kill_node(e); - - mutex_unlock(&root->d_inode->i_mutex); - dput(root); - break; - default: return res; + case 1: + /* Disable this handler. */ + clear_bit(Enabled, &e->flags); + break; + case 2: + /* Enable this handler. */ + set_bit(Enabled, &e->flags); + break; + case 3: + /* Delete this handler. */ + root = dget(file->f_path.dentry->d_sb->s_root); + mutex_lock(&root->d_inode->i_mutex); + + kill_node(e); + + mutex_unlock(&root->d_inode->i_mutex); + dput(root); + break; + default: + return res; } + return count; } @@ -654,26 +745,36 @@ bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); } -static ssize_t bm_status_write(struct file * file, const char __user * buffer, +static ssize_t bm_status_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { int res = parse_command(buffer, count); struct dentry *root; switch (res) { - case 1: enabled = 0; break; - case 2: enabled = 1; break; - case 3: root = dget(file->f_path.dentry->d_sb->s_root); - mutex_lock(&root->d_inode->i_mutex); - - while (!list_empty(&entries)) - kill_node(list_entry(entries.next, Node, list)); - - mutex_unlock(&root->d_inode->i_mutex); - dput(root); - break; - default: return res; + case 1: + /* Disable all handlers. */ + enabled = 0; + break; + case 2: + /* Enable all handlers. */ + enabled = 1; + break; + case 3: + /* Delete all handlers. */ + root = dget(file->f_path.dentry->d_sb->s_root); + mutex_lock(&root->d_inode->i_mutex); + + while (!list_empty(&entries)) + kill_node(list_entry(entries.next, Node, list)); + + mutex_unlock(&root->d_inode->i_mutex); + dput(root); + break; + default: + return res; } + return count; } @@ -690,14 +791,16 @@ static const struct super_operations s_ops = { .evict_inode = bm_evict_inode, }; -static int bm_fill_super(struct super_block * sb, void * data, int silent) +static int bm_fill_super(struct super_block *sb, void *data, int silent) { + int err; static struct tree_descr bm_files[] = { [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, [3] = {"register", &bm_register_operations, S_IWUSR}, /* last one */ {""} }; - int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); + + err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); if (!err) sb->s_op = &s_ops; return err; diff --git a/fs/block_dev.c b/fs/block_dev.c index 1d9c9f3754f..b48c41bf0f8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -235,7 +235,10 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb = get_active_super(bdev); if (!sb) goto out; - error = freeze_super(sb); + if (sb->s_op->freeze_super) + error = sb->s_op->freeze_super(sb); + else + error = freeze_super(sb); if (error) { deactivate_super(sb); bdev->bd_fsfreeze_count--; @@ -272,7 +275,10 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) if (!sb) goto out; - error = thaw_super(sb); + if (sb->s_op->thaw_super) + error = sb->s_op->thaw_super(sb); + else + error = thaw_super(sb); if (error) { bdev->bd_fsfreeze_count++; mutex_unlock(&bdev->bd_fsfreeze_mutex); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d3220d31d3c..dcd9be32ac5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1011,8 +1011,6 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); - if (*pg_index == (vcnt - 1) && *pg_offset == 0) - memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); @@ -1054,3 +1052,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, return 1; } + +/* + * When uncompressing data, we need to make sure and zero any parts of + * the biovec that were not filled in by the decompression code. pg_index + * and pg_offset indicate the last page and the last offset of that page + * that have been filled in. This will zero everything remaining in the + * biovec. + */ +void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, + unsigned long pg_index, + unsigned long pg_offset) +{ + while (pg_index < vcnt) { + struct page *page = bvec[pg_index].bv_page; + unsigned long off = bvec[pg_index].bv_offset; + unsigned long len = bvec[pg_index].bv_len; + + if (pg_offset < off) + pg_offset = off; + if (pg_offset < off + len) { + unsigned long bytes = off + len - pg_offset; + char *kaddr; + + kaddr = kmap_atomic(page); + memset(kaddr + pg_offset, 0, bytes); + kunmap_atomic(kaddr); + } + pg_index++; + pg_offset = 0; + } +} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 0c803b4fbf9..d181f70caae 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -45,7 +45,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long nr_pages); int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); - +void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, + unsigned long pg_index, + unsigned long pg_offset); struct btrfs_compress_op { struct list_head *(*alloc_workspace)(void); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19bc6162fb8..150822ee0a0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, { int i; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* lockdep really cares that we take all of these spinlocks - * in the right order. If any of the locks in the path are not - * currently blocking, it is going to complain. So, make really - * really sure by forcing the path to blocking before we clear - * the path blocking. - */ if (held) { btrfs_set_lock_blocking_rw(held, held_rw); if (held_rw == BTRFS_WRITE_LOCK) @@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, held_rw = BTRFS_READ_LOCK_BLOCKING; } btrfs_set_path_blocking(p); -#endif for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { if (p->nodes[i] && p->locks[i]) { @@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, } } -#ifdef CONFIG_DEBUG_LOCK_ALLOC if (held) btrfs_clear_lock_blocking_rw(held, held_rw); -#endif } /* this also releases the path */ @@ -2893,7 +2883,7 @@ cow_done: } p->locks[level] = BTRFS_WRITE_LOCK; } else { - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); @@ -3025,7 +3015,7 @@ again: } level = btrfs_header_level(b); - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 783a94355ef..84a2d186827 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, ret = 0; fail: while (ret < 0 && !list_empty(&tmplist)) { - sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); + sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); list_del(&sums->list); kfree(sums); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d23362f4464..ff0dcc016b7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5303,7 +5303,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_CAST(inode); } - return d_materialise_unique(dentry, inode); + return d_splice_alias(inode, dentry); } unsigned char btrfs_filetype_table[] = { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4399f0c3a4c..080fe66c034 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5296,7 +5296,7 @@ long btrfs_ioctl(struct file *file, unsigned int ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); if (ret) return ret; - ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); + ret = btrfs_sync_fs(file_inode(file)->i_sb, 1); /* * The transaction thread may want to do more work, * namely it pokes the cleaner ktread that will start diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5665d214924..f8229ef1b46 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -128,6 +128,26 @@ again: } /* + * take a spinning read lock. + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers + */ +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) +{ + if (atomic_read(&eb->blocking_writers)) + return 0; + + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + return 0; + } + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); + return 1; +} + +/* * returns 1 if we get the read lock and 0 if we don't * this won't wait for blocking writers */ @@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) atomic_read(&eb->blocking_readers)) return 0; - if (!write_trylock(&eb->lock)) - return 0; - + write_lock(&eb->lock); if (atomic_read(&eb->blocking_writers) || atomic_read(&eb->blocking_readers)) { write_unlock(&eb->lock); diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index b81e0e9a489..c44a9d5f536 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); void btrfs_assert_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_read_lock(struct extent_buffer *eb); int btrfs_try_tree_write_lock(struct extent_buffer *eb); +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb); + static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) { diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 78285f30909..617553cdb7d 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -373,6 +373,8 @@ cont: } done: kunmap(pages_in[page_in_index]); + if (!ret) + btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); return ret; } @@ -410,10 +412,23 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, goto out; } + /* + * the caller is already checking against PAGE_SIZE, but lets + * move this check closer to the memcpy/memset + */ + destlen = min_t(unsigned long, destlen, PAGE_SIZE); bytes = min_t(unsigned long, destlen, out_len - start_byte); kaddr = kmap_atomic(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); + + /* + * btrfs_getblock is doing a zero on the tail of the page too, + * but this will cover anything missing from the decompressed + * data. + */ + if (bytes < destlen) + memset(kaddr+bytes, 0, destlen-bytes); kunmap_atomic(kaddr); out: return ret; diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 759fa4e2de8..fb22fd8d8fb 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -299,6 +299,8 @@ done: zlib_inflateEnd(&workspace->strm); if (data_in) kunmap(pages_in[page_in_index]); + if (!ret) + btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); return ret; } @@ -310,10 +312,14 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - unsigned long bytes_left = destlen; + unsigned long bytes_left; unsigned long total_out = 0; + unsigned long pg_offset = 0; char *kaddr; + destlen = min_t(unsigned long, destlen, PAGE_SIZE); + bytes_left = destlen; + workspace->strm.next_in = data_in; workspace->strm.avail_in = srclen; workspace->strm.total_in = 0; @@ -341,7 +347,6 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; - unsigned long pg_offset = 0; ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); if (ret != Z_OK && ret != Z_STREAM_END) @@ -384,6 +389,17 @@ next: ret = 0; zlib_inflateEnd(&workspace->strm); + + /* + * this should only happen if zlib returned fewer bytes than we + * expected. btrfs_get_block is responsible for zeroing from the + * end of the inline extent (destlen) to the end of the page + */ + if (pg_offset < destlen) { + kaddr = kmap_atomic(dest_page); + memset(kaddr + pg_offset, 0, destlen - pg_offset); + kunmap_atomic(kaddr); + } return ret; } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index e12f189d539..7f8e83f9d74 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -102,8 +102,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, struct cachefiles_object *object; struct rb_node *p; - _enter(",'%*.*s'", - dentry->d_name.len, dentry->d_name.len, dentry->d_name.name); + _enter(",'%pd'", dentry); write_lock(&cache->active_lock); @@ -273,9 +272,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, char nbuffer[8 + 8 + 1]; int ret; - _enter(",'%*.*s','%*.*s'", - dir->d_name.len, dir->d_name.len, dir->d_name.name, - rep->d_name.len, rep->d_name.len, rep->d_name.name); + _enter(",'%pd','%pd'", dir, rep); _debug("remove %p from %p", rep, dir); @@ -597,8 +594,7 @@ lookup_again: /* if we've found that the terminal object exists, then we need to * check its attributes and delete it if it's out of date */ if (!object->new) { - _debug("validate '%*.*s'", - next->d_name.len, next->d_name.len, next->d_name.name); + _debug("validate '%pd'", next); ret = cachefiles_check_object_xattr(object, auxdata); if (ret == -ESTALE) { @@ -827,8 +823,8 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, unsigned long start; int ret; - //_enter(",%*.*s/,%s", - // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + //_enter(",%pd/,%s", + // dir, filename); /* look up the victim */ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); @@ -910,8 +906,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, struct dentry *victim; int ret; - _enter(",%*.*s/,%s", - dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + _enter(",%pd/,%s", dir, filename); victim = cachefiles_check_active(cache, dir, filename); if (IS_ERR(victim)) @@ -969,8 +964,8 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, { struct dentry *victim; - //_enter(",%*.*s/,%s", - // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + //_enter(",%pd/,%s", + // dir, filename); victim = cachefiles_check_active(cache, dir, filename); if (IS_ERR(victim)) diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index acbc1f094fb..a8a68745e11 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -51,9 +51,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object) } if (ret != -EEXIST) { - pr_err("Can't set xattr on %*.*s [%lu] (err %d)\n", - dentry->d_name.len, dentry->d_name.len, - dentry->d_name.name, dentry->d_inode->i_ino, + pr_err("Can't set xattr on %pd [%lu] (err %d)\n", + dentry, dentry->d_inode->i_ino, -ret); goto error; } @@ -64,9 +63,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object) if (ret == -ERANGE) goto bad_type_length; - pr_err("Can't read xattr on %*.*s [%lu] (err %d)\n", - dentry->d_name.len, dentry->d_name.len, - dentry->d_name.name, dentry->d_inode->i_ino, + pr_err("Can't read xattr on %pd [%lu] (err %d)\n", + dentry, dentry->d_inode->i_ino, -ret); goto error; } @@ -92,9 +90,8 @@ bad_type_length: bad_type: xtype[2] = 0; - pr_err("Cache object %*.*s [%lu] type %s not %s\n", - dentry->d_name.len, dentry->d_name.len, - dentry->d_name.name, dentry->d_inode->i_ino, + pr_err("Cache object %pd [%lu] type %s not %s\n", + dentry, dentry->d_inode->i_ino, xtype, type); ret = -EIO; goto error; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 659f2ea9e6f..cefca661464 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, for (i = 0; i < CEPH_CAP_BITS; i++) if ((dirty & (1 << i)) && - flush_tid == ci->i_cap_flush_tid[i]) + (u16)flush_tid == ci->i_cap_flush_tid[i]) cleaned |= 1 << i; dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 5d5a4c8c849..1b2355109b9 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -83,10 +83,9 @@ static int mdsc_show(struct seq_file *s, void *p) if (IS_ERR(path)) path = NULL; spin_lock(&req->r_dentry->d_lock); - seq_printf(s, " #%llx/%.*s (%s)", + seq_printf(s, " #%llx/%pd (%s)", ceph_ino(req->r_dentry->d_parent->d_inode), - req->r_dentry->d_name.len, - req->r_dentry->d_name.name, + req->r_dentry, path ? path : ""); spin_unlock(&req->r_dentry->d_lock); kfree(path); @@ -103,11 +102,10 @@ static int mdsc_show(struct seq_file *s, void *p) if (IS_ERR(path)) path = NULL; spin_lock(&req->r_old_dentry->d_lock); - seq_printf(s, " #%llx/%.*s (%s)", + seq_printf(s, " #%llx/%pd (%s)", req->r_old_dentry_dir ? ceph_ino(req->r_old_dentry_dir) : 0, - req->r_old_dentry->d_name.len, - req->r_old_dentry->d_name.name, + req->r_old_dentry, path ? path : ""); spin_unlock(&req->r_old_dentry->d_lock); kfree(path); @@ -150,8 +148,8 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) spin_lock(&mdsc->dentry_lru_lock); list_for_each_entry(di, &mdsc->dentry_lru, lru) { struct dentry *dentry = di->dentry; - seq_printf(s, "%p %p\t%.*s\n", - di, dentry, dentry->d_name.len, dentry->d_name.name); + seq_printf(s, "%p %p\t%pd\n", + di, dentry, dentry); } spin_unlock(&mdsc->dentry_lru_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e6d63f8f98c..681a8537b64 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -111,7 +111,7 @@ static int fpos_cmp(loff_t l, loff_t r) /* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on - * d_u.d_child when we initially get results back from the MDS, and + * d_child when we initially get results back from the MDS, and * falling back to a "normal" sync readdir if any dentries in the dir * are dropped. * @@ -123,7 +123,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, u32 shared_gen) { struct ceph_file_info *fi = file->private_data; - struct dentry *parent = file->f_dentry; + struct dentry *parent = file->f_path.dentry; struct inode *dir = parent->d_inode; struct list_head *p; struct dentry *dentry, *last; @@ -147,11 +147,11 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, p = parent->d_subdirs.prev; dout(" initial p %p/%p\n", p->prev, p->next); } else { - p = last->d_u.d_child.prev; + p = last->d_child.prev; } more: - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); while (1) { dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, @@ -168,13 +168,13 @@ more: ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && fpos_cmp(ctx->pos, di->offset) <= 0) break; - dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, - dentry->d_name.len, dentry->d_name.name, di->offset, + dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry, + dentry, di->offset, ctx->pos, d_unhashed(dentry) ? " unhashed" : "", !dentry->d_inode ? " null" : ""); spin_unlock(&dentry->d_lock); p = p->prev; - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); } @@ -190,8 +190,8 @@ more: goto out; } - dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, - dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); + dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, + dentry, dentry, dentry->d_inode); if (!dir_emit(ctx, dentry->d_name.name, dentry->d_name.len, ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), @@ -274,7 +274,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) off = 1; } if (ctx->pos == 1) { - ino_t ino = parent_ino(file->f_dentry); + ino_t ino = parent_ino(file->f_path.dentry); dout("readdir off 1 -> '..'\n"); if (!dir_emit(ctx, "..", 2, ceph_translate_ino(inode->i_sb, ino), @@ -337,7 +337,7 @@ more: } req->r_inode = inode; ihold(inode); - req->r_dentry = dget(file->f_dentry); + req->r_dentry = dget(file->f_path.dentry); /* hints to request -> mds selection code */ req->r_direct_mode = USE_AUTH_MDS; req->r_direct_hash = ceph_frag_value(frag); @@ -538,8 +538,8 @@ int ceph_handle_snapdir(struct ceph_mds_request *req, strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) { struct inode *inode = ceph_get_snapdir(parent); - dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", - dentry, dentry->d_name.len, dentry->d_name.name, inode); + dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n", + dentry, dentry, inode); BUG_ON(!d_unhashed(dentry)); d_add(dentry, inode); err = 0; @@ -603,8 +603,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, int op; int err; - dout("lookup %p dentry %p '%.*s'\n", - dir, dentry, dentry->d_name.len, dentry->d_name.name); + dout("lookup %p dentry %p '%pd'\n", + dir, dentry, dentry); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -774,8 +774,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (ceph_snap(dir) == CEPH_SNAPDIR) { /* mkdir .snap/foo is a MKSNAP */ op = CEPH_MDS_OP_MKSNAP; - dout("mksnap dir %p snap '%.*s' dn %p\n", dir, - dentry->d_name.len, dentry->d_name.name, dentry); + dout("mksnap dir %p snap '%pd' dn %p\n", dir, + dentry, dentry); } else if (ceph_snap(dir) == CEPH_NOSNAP) { dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); op = CEPH_MDS_OP_MKDIR; @@ -888,8 +888,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ - dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len, - dentry->d_name.name, dentry); + dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry); op = CEPH_MDS_OP_RMSNAP; } else if (ceph_snap(dir) == CEPH_NOSNAP) { dout("unlink/rmdir dir %p dn %p inode %p\n", @@ -1063,16 +1062,15 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & LOOKUP_RCU) return -ECHILD; - dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, - dentry->d_name.len, dentry->d_name.name, dentry->d_inode, - ceph_dentry(dentry)->offset); + dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry, + dentry, dentry->d_inode, ceph_dentry(dentry)->offset); dir = ceph_get_dentry_parent_inode(dentry); /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { - dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, - dentry->d_name.len, dentry->d_name.name, dentry->d_inode); + dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, + dentry, dentry->d_inode); valid = 1; } else if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) { @@ -1265,8 +1263,7 @@ void ceph_dentry_lru_add(struct dentry *dn) struct ceph_dentry_info *di = ceph_dentry(dn); struct ceph_mds_client *mdsc; - dout("dentry_lru_add %p %p '%.*s'\n", di, dn, - dn->d_name.len, dn->d_name.name); + dout("dentry_lru_add %p %p '%pd'\n", di, dn, dn); mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_add_tail(&di->lru, &mdsc->dentry_lru); @@ -1279,8 +1276,8 @@ void ceph_dentry_lru_touch(struct dentry *dn) struct ceph_dentry_info *di = ceph_dentry(dn); struct ceph_mds_client *mdsc; - dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, - dn->d_name.len, dn->d_name.name, di->offset); + dout("dentry_lru_touch %p %p '%pd' (offset %lld)\n", di, dn, dn, + di->offset); mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_move_tail(&di->lru, &mdsc->dentry_lru); @@ -1292,8 +1289,7 @@ void ceph_dentry_lru_del(struct dentry *dn) struct ceph_dentry_info *di = ceph_dentry(dn); struct ceph_mds_client *mdsc; - dout("dentry_lru_del %p %p '%.*s'\n", di, dn, - dn->d_name.len, dn->d_name.name); + dout("dentry_lru_del %p %p '%pd'\n", di, dn, dn); mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_del_init(&di->lru); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d7e0da8366e..9f8e3572040 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -211,7 +211,7 @@ int ceph_open(struct inode *inode, struct file *file) req->r_num_caps = 1; if (flags & O_CREAT) - parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); + parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry); err = ceph_mdsc_do_request(mdsc, parent_inode, req); iput(parent_inode); if (!err) @@ -238,8 +238,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct ceph_acls_info acls = {}; int err; - dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n", - dir, dentry, dentry->d_name.len, dentry->d_name.name, + dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", + dir, dentry, dentry, d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); if (dentry->d_name.len > NAME_MAX) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7b613900440..a5593d51d03 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -967,7 +967,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, /* dn must be unhashed */ if (!d_unhashed(dn)) d_drop(dn); - realdn = d_materialise_unique(dn, in); + realdn = d_splice_alias(in, dn); if (IS_ERR(realdn)) { pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", PTR_ERR(realdn), dn, in, ceph_vinop(in)); @@ -1186,20 +1186,18 @@ retry_lookup: struct inode *olddir = req->r_old_dentry_dir; BUG_ON(!olddir); - dout(" src %p '%.*s' dst %p '%.*s'\n", + dout(" src %p '%pd' dst %p '%pd'\n", req->r_old_dentry, - req->r_old_dentry->d_name.len, - req->r_old_dentry->d_name.name, - dn, dn->d_name.len, dn->d_name.name); + req->r_old_dentry, + dn, dn); dout("fill_trace doing d_move %p -> %p\n", req->r_old_dentry, dn); d_move(req->r_old_dentry, dn); - dout(" src %p '%.*s' dst %p '%.*s'\n", + dout(" src %p '%pd' dst %p '%pd'\n", + req->r_old_dentry, req->r_old_dentry, - req->r_old_dentry->d_name.len, - req->r_old_dentry->d_name.name, - dn, dn->d_name.len, dn->d_name.name); + dn, dn); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ @@ -1399,7 +1397,7 @@ retry_lookup: /* reorder parent's d_subdirs */ spin_lock(&parent->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &parent->d_subdirs); + list_move(&dn->d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); spin_unlock(&parent->d_lock); } diff --git a/fs/char_dev.c b/fs/char_dev.c index f77f7702fab..67b2007f10f 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -117,7 +117,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, goto out; } major = i; - ret = major; } cd->major = major; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 44ec72684df..9c56ef77640 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -34,27 +34,9 @@ void cifs_dump_mem(char *label, void *data, int length) { - int i, j; - int *intptr = data; - char *charptr = data; - char buf[10], line[80]; - - printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", - label, length, data); - for (i = 0; i < length; i += 16) { - line[0] = 0; - for (j = 0; (j < 4) && (i + j * 4 < length); j++) { - sprintf(buf, " %08x", intptr[i / 4 + j]); - strcat(line, buf); - } - buf[0] = ' '; - buf[2] = 0; - for (j = 0; (j < 16) && (i + j < length); j++) { - buf[1] = isprint(charptr[i + j]) ? charptr[i + j] : '.'; - strcat(line, buf); - } - printk(KERN_DEBUG "%s\n", line); - } + pr_debug("%s: dump of %d bytes of data at 0x%p\n", label, length, data); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 16, 4, + data, length, true); } #ifdef CONFIG_CIFS_DEBUG @@ -68,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR "CIFS VFS: %pV", &vaf); + pr_err("CIFS VFS: %pV", &vaf); va_end(args); } @@ -274,6 +256,7 @@ static ssize_t cifs_stats_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; struct list_head *tmp1, *tmp2, *tmp3; struct TCP_Server_Info *server; @@ -284,7 +267,7 @@ static ssize_t cifs_stats_proc_write(struct file *file, if (rc) return rc; - if (c == '1' || c == 'y' || c == 'Y' || c == '0') { + if (strtobool(&c, &bv) == 0) { #ifdef CONFIG_CIFS_STATS2 atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); @@ -451,15 +434,14 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - cifsFYI = 0; - else if (c == '1' || c == 'y' || c == 'Y') - cifsFYI = 1; + if (strtobool(&c, &bv) == 0) + cifsFYI = bv; else if ((c > '1') && (c <= '9')) cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */ @@ -490,15 +472,18 @@ static ssize_t cifs_linux_ext_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - linuxExtEnabled = 0; - else if (c == '1' || c == 'y' || c == 'Y') - linuxExtEnabled = 1; + + rc = strtobool(&c, &bv); + if (rc) + return rc; + + linuxExtEnabled = bv; return count; } @@ -527,15 +512,18 @@ static ssize_t cifs_lookup_cache_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - lookupCacheEnabled = 0; - else if (c == '1' || c == 'y' || c == 'Y') - lookupCacheEnabled = 1; + + rc = strtobool(&c, &bv); + if (rc) + return rc; + + lookupCacheEnabled = bv; return count; } @@ -564,15 +552,18 @@ static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - traceSMB = 0; - else if (c == '1' || c == 'y' || c == 'Y') - traceSMB = 1; + + rc = strtobool(&c, &bv); + if (rc) + return rc; + + traceSMB = bv; return count; } @@ -630,6 +621,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, unsigned int flags; char flags_string[12]; char c; + bool bv; if ((count < 1) || (count > 11)) return -EINVAL; @@ -642,11 +634,8 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, if (count < 3) { /* single char or single char followed by null */ c = flags_string[0]; - if (c == '0' || c == 'n' || c == 'N') { - global_secflags = CIFSSEC_DEF; /* default */ - return count; - } else if (c == '1' || c == 'y' || c == 'Y') { - global_secflags = CIFSSEC_MAX; + if (strtobool(&c, &bv) == 0) { + global_secflags = bv ? CIFSSEC_MAX : CIFSSEC_DEF; return count; } else if (!isdigit(c)) { cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index c99b40fb609..f40fbaca1b2 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -53,13 +53,12 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...); do { \ if (type == FYI) { \ if (cifsFYI & CIFS_INFO) { \ - printk(KERN_DEBUG "%s: " fmt, \ - __FILE__, ##__VA_ARGS__); \ + pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \ } \ } else if (type == VFS) { \ cifs_vfs_err(fmt, ##__VA_ARGS__); \ } else if (type == NOISY && type != 0) { \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + pr_debug(fmt, ##__VA_ARGS__); \ } \ } while (0) @@ -71,7 +70,7 @@ do { \ #define cifs_dbg(type, fmt, ...) \ do { \ if (0) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + pr_debug(fmt, ##__VA_ARGS__); \ } while (0) #endif diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 6d00c419cba..1ea780bc637 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = { 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; /* security id for Authenticated Users system group */ static const struct cifs_sid sid_authusers = { - 1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} }; + 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} }; /* group users */ static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 9d7996e8e79..d72fe37f542 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -209,8 +209,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) { - struct super_block *sb = file->f_path.dentry->d_sb; - struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct TCP_Server_Info *server = tcon->ses->server; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 002e0c17393..252f5c15806 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.05" +#define CIFS_VERSION "2.06" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 02a33e52990..6e139111fdb 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1168,6 +1168,12 @@ CIFS_SB(struct super_block *sb) return sb->s_fs_info; } +static inline struct cifs_sb_info * +CIFS_FILE_SB(struct file *file) +{ + return CIFS_SB(file_inode(file)->i_sb); +} + static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 61d00a6e398..fa13d5e79f6 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2477,14 +2477,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, } parm_data = (struct cifs_posix_lock *) ((char *)&pSMBr->hdr.Protocol + data_offset); - if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK)) + if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK)) pLockData->fl_type = F_UNLCK; else { if (parm_data->lock_type == - __constant_cpu_to_le16(CIFS_RDLCK)) + cpu_to_le16(CIFS_RDLCK)) pLockData->fl_type = F_RDLCK; else if (parm_data->lock_type == - __constant_cpu_to_le16(CIFS_WRLCK)) + cpu_to_le16(CIFS_WRLCK)) pLockData->fl_type = F_WRLCK; pLockData->fl_start = le64_to_cpu(parm_data->start); @@ -3276,25 +3276,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT); pSMB->TotalParameterCount = 0; - pSMB->TotalDataCount = __constant_cpu_to_le32(2); + pSMB->TotalDataCount = cpu_to_le32(2); pSMB->MaxParameterCount = 0; pSMB->MaxDataCount = 0; pSMB->MaxSetupCount = 4; pSMB->Reserved = 0; pSMB->ParameterOffset = 0; - pSMB->DataCount = __constant_cpu_to_le32(2); + pSMB->DataCount = cpu_to_le32(2); pSMB->DataOffset = cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req, compression_state) - 4); /* 84 */ pSMB->SetupCount = 4; - pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL); + pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); pSMB->ParameterCount = 0; - pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION); + pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION); pSMB->IsFsctl = 1; /* FSCTL */ pSMB->IsRootFlag = 0; pSMB->Fid = fid; /* file handle always le */ /* 3 byte pad, followed by 2 byte compress state */ - pSMB->ByteCount = __constant_cpu_to_le16(5); + pSMB->ByteCount = cpu_to_le16(5); inc_rfc1001_len(pSMB, 5); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, @@ -3430,10 +3430,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, cifs_acl->version = cpu_to_le16(1); if (acl_type == ACL_TYPE_ACCESS) { cifs_acl->access_entry_count = cpu_to_le16(count); - cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF); + cifs_acl->default_entry_count = cpu_to_le16(0xFFFF); } else if (acl_type == ACL_TYPE_DEFAULT) { cifs_acl->default_entry_count = cpu_to_le16(count); - cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF); + cifs_acl->access_entry_count = cpu_to_le16(0xFFFF); } else { cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 24fa08d261f..2a772da16b8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1466,9 +1466,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->seal = 1; break; case Opt_noac: - printk(KERN_WARNING "CIFS: Mount option noac not " - "supported. Instead set " - "/proc/fs/cifs/LookupCacheEnabled to 0\n"); + pr_warn("CIFS: Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n"); break; case Opt_fsc: #ifndef CONFIG_CIFS_FSCACHE @@ -1598,7 +1596,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (strnlen(string, CIFS_MAX_USERNAME_LEN) > CIFS_MAX_USERNAME_LEN) { - printk(KERN_WARNING "CIFS: username too long\n"); + pr_warn("CIFS: username too long\n"); goto cifs_parse_mount_err; } vol->username = kstrdup(string, GFP_KERNEL); @@ -1662,8 +1660,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); if (vol->password == NULL) { - printk(KERN_WARNING "CIFS: no memory " - "for password\n"); + pr_warn("CIFS: no memory for password\n"); goto cifs_parse_mount_err; } @@ -1687,8 +1684,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (!cifs_convert_address(dstaddr, string, strlen(string))) { - printk(KERN_ERR "CIFS: bad ip= option (%s).\n", - string); + pr_err("CIFS: bad ip= option (%s).\n", string); goto cifs_parse_mount_err; } got_ip = true; @@ -1700,15 +1696,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN) == CIFS_MAX_DOMAINNAME_LEN) { - printk(KERN_WARNING "CIFS: domain name too" - " long\n"); + pr_warn("CIFS: domain name too long\n"); goto cifs_parse_mount_err; } vol->domainname = kstrdup(string, GFP_KERNEL); if (!vol->domainname) { - printk(KERN_WARNING "CIFS: no memory " - "for domainname\n"); + pr_warn("CIFS: no memory for domainname\n"); goto cifs_parse_mount_err; } cifs_dbg(FYI, "Domain name set\n"); @@ -1721,8 +1715,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (!cifs_convert_address( (struct sockaddr *)&vol->srcaddr, string, strlen(string))) { - printk(KERN_WARNING "CIFS: Could not parse" - " srcaddr: %s\n", string); + pr_warn("CIFS: Could not parse srcaddr: %s\n", + string); goto cifs_parse_mount_err; } break; @@ -1732,8 +1726,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto out_nomem; if (strnlen(string, 1024) >= 65) { - printk(KERN_WARNING "CIFS: iocharset name " - "too long.\n"); + pr_warn("CIFS: iocharset name too long.\n"); goto cifs_parse_mount_err; } @@ -1741,8 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->iocharset = kstrdup(string, GFP_KERNEL); if (!vol->iocharset) { - printk(KERN_WARNING "CIFS: no memory" - "for charset\n"); + pr_warn("CIFS: no memory for charset\n"); goto cifs_parse_mount_err; } } @@ -1773,9 +1765,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, * set at top of the function */ if (i == RFC1001_NAME_LEN && string[i] != 0) - printk(KERN_WARNING "CIFS: netbiosname" - " longer than 15 truncated.\n"); - + pr_warn("CIFS: netbiosname longer than 15 truncated.\n"); break; case Opt_servern: /* servernetbiosname specified override *SMBSERVER */ @@ -1801,8 +1791,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* The string has 16th byte zero still from set at top of the function */ if (i == RFC1001_NAME_LEN && string[i] != 0) - printk(KERN_WARNING "CIFS: server net" - "biosname longer than 15 truncated.\n"); + pr_warn("CIFS: server netbiosname longer than 15 truncated.\n"); break; case Opt_ver: string = match_strdup(args); @@ -1814,8 +1803,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, break; } /* For all other value, error */ - printk(KERN_WARNING "CIFS: Invalid version" - " specified\n"); + pr_warn("CIFS: Invalid version specified\n"); goto cifs_parse_mount_err; case Opt_vers: string = match_strdup(args); @@ -1856,7 +1844,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } if (!sloppy && invalid) { - printk(KERN_ERR "CIFS: Unknown mount option \"%s\"\n", invalid); + pr_err("CIFS: Unknown mount option \"%s\"\n", invalid); goto cifs_parse_mount_err; } @@ -1882,8 +1870,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* No ip= option specified? Try to get it from UNC */ if (!cifs_convert_address(dstaddr, &vol->UNC[2], strlen(&vol->UNC[2]))) { - printk(KERN_ERR "Unable to determine destination " - "address.\n"); + pr_err("Unable to determine destination address.\n"); goto cifs_parse_mount_err; } } @@ -1894,20 +1881,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (uid_specified) vol->override_uid = override_uid; else if (override_uid == 1) - printk(KERN_NOTICE "CIFS: ignoring forceuid mount option " - "specified with no uid= option.\n"); + pr_notice("CIFS: ignoring forceuid mount option specified with no uid= option.\n"); if (gid_specified) vol->override_gid = override_gid; else if (override_gid == 1) - printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " - "specified with no gid= option.\n"); + pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n"); kfree(mountdata_copy); return 0; out_nomem: - printk(KERN_WARNING "Could not allocate temporary buffer\n"); + pr_warn("Could not allocate temporary buffer\n"); cifs_parse_mount_err: kfree(string); kfree(mountdata_copy); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3e4d00a06c4..96b7e9b7706 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1066,7 +1066,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); - buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); + buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { free_xid(xid); return -ENOMEM; @@ -1401,7 +1401,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); - buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); + buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1586,7 +1586,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, tcon->ses->server); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + cifs_sb = CIFS_FILE_SB(file); netfid = cfile->fid.netfid; cinode = CIFS_I(file_inode(file)); @@ -2305,7 +2305,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct inode *inode = file->f_mapping->host; rc = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -2585,7 +2585,7 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) iov_iter_truncate(from, len); INIT_LIST_HEAD(&wdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + cifs_sb = CIFS_FILE_SB(file); open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); @@ -3010,7 +3010,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) return 0; INIT_LIST_HEAD(&rdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + cifs_sb = CIFS_FILE_SB(file); open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); @@ -3155,7 +3155,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) __u32 pid; xid = get_xid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + cifs_sb = CIFS_FILE_SB(file); /* FIXME: set up handlers for larger reads and/or convert to async */ rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize); @@ -3462,7 +3462,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, int rc; struct list_head tmplist; struct cifsFileInfo *open_file = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct TCP_Server_Info *server; pid_t pid; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 197cb503d52..0c3ce464cae 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -895,7 +895,7 @@ inode_has_hashed_dentries(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index b7415d596db..337946355b2 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -513,39 +513,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) void dump_smb(void *buf, int smb_buf_length) { - int i, j; - char debug_line[17]; - unsigned char *buffer = buf; - if (traceSMB == 0) return; - for (i = 0, j = 0; i < smb_buf_length; i++, j++) { - if (i % 8 == 0) { - /* have reached the beginning of line */ - printk(KERN_DEBUG "| "); - j = 0; - } - printk("%0#4x ", buffer[i]); - debug_line[2 * j] = ' '; - if (isprint(buffer[i])) - debug_line[1 + (2 * j)] = buffer[i]; - else - debug_line[1 + (2 * j)] = '_'; - - if (i % 8 == 7) { - /* reached end of line, time to print ascii */ - debug_line[16] = 0; - printk(" | %s\n", debug_line); - } - } - for (; j < 8; j++) { - printk(" "); - debug_line[2 * j] = ' '; - debug_line[1 + (2 * j)] = ' '; - } - printk(" | %s\n", debug_line); - return; + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 8, 2, buf, + smb_buf_length, true); } void diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 8fd2a95860b..8eaf20a8064 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -123,7 +123,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); + alias = d_splice_alias(inode, dentry); if (alias && !IS_ERR(alias)) dput(alias); out: @@ -261,7 +261,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file) int rc = 0; char *full_path = NULL; struct cifsFileInfo *cifsFile; - struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; struct TCP_Server_Info *server; @@ -561,7 +561,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, loff_t first_entry_in_buffer; loff_t index_to_find = pos; struct cifsFileInfo *cfile = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct TCP_Server_Info *server = tcon->ses->server; /* check if index in the buffer */ @@ -679,7 +679,7 @@ static int cifs_filldir(char *find_entry, struct file *file, char *scratch_buf, unsigned int max_len) { struct cifsFileInfo *file_info = file->private_data; - struct super_block *sb = file->f_path.dentry->d_sb; + struct super_block *sb = file_inode(file)->i_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_dirent de = { NULL, }; struct cifs_fattr fattr; @@ -753,7 +753,7 @@ static int cifs_filldir(char *find_entry, struct file *file, */ fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; - cifs_prime_dcache(file->f_dentry, &name, &fattr); + cifs_prime_dcache(file->f_path.dentry, &name, &fattr); ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); return !dir_emit(ctx, name.name, name.len, ino, fattr.cf_dtype); @@ -794,10 +794,6 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) if it before then restart search if after then keep searching till find it */ - if (file->private_data == NULL) { - rc = -EINVAL; - goto rddir2_exit; - } cifsFile = file->private_data; if (cifsFile->srch_inf.endOfSearch) { if (cifsFile->srch_inf.emptyDir) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 57db63ff88d..bce6fdcd5d4 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4, USHRT_MAX)); pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - pSMB->req.VcNumber = __constant_cpu_to_le16(1); + pSMB->req.VcNumber = cpu_to_le16(1); /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ @@ -1303,6 +1303,11 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + if (ses->Suid != smb_buf->Uid) { + ses->Suid = smb_buf->Uid; + cifs_dbg(FYI, "UID changed! new UID = %llu\n", ses->Suid); + } + bytes_remaining = get_bcc(smb_buf); bcc_ptr = pByteArea(smb_buf); blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 45992944e23..7198eac5ddd 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, return -EINVAL; max_num = max_buf / sizeof(struct smb2_lock_element); - buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); + buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) return -ENOMEM; @@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) } max_num = max_buf / sizeof(struct smb2_lock_element); - buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); + buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) { free_xid(xid); return -ENOMEM; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 1a08a34838f..f1cefc9763e 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid) * indexed by command in host byte order */ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { - /* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65), - /* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9), - /* SMB2_LOGOFF */ __constant_cpu_to_le16(4), - /* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16), - /* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4), - /* SMB2_CREATE */ __constant_cpu_to_le16(89), - /* SMB2_CLOSE */ __constant_cpu_to_le16(60), - /* SMB2_FLUSH */ __constant_cpu_to_le16(4), - /* SMB2_READ */ __constant_cpu_to_le16(17), - /* SMB2_WRITE */ __constant_cpu_to_le16(17), - /* SMB2_LOCK */ __constant_cpu_to_le16(4), - /* SMB2_IOCTL */ __constant_cpu_to_le16(49), + /* SMB2_NEGOTIATE */ cpu_to_le16(65), + /* SMB2_SESSION_SETUP */ cpu_to_le16(9), + /* SMB2_LOGOFF */ cpu_to_le16(4), + /* SMB2_TREE_CONNECT */ cpu_to_le16(16), + /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4), + /* SMB2_CREATE */ cpu_to_le16(89), + /* SMB2_CLOSE */ cpu_to_le16(60), + /* SMB2_FLUSH */ cpu_to_le16(4), + /* SMB2_READ */ cpu_to_le16(17), + /* SMB2_WRITE */ cpu_to_le16(17), + /* SMB2_LOCK */ cpu_to_le16(4), + /* SMB2_IOCTL */ cpu_to_le16(49), /* BB CHECK this ... not listed in documentation */ - /* SMB2_CANCEL */ __constant_cpu_to_le16(0), - /* SMB2_ECHO */ __constant_cpu_to_le16(4), - /* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9), - /* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9), - /* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9), - /* SMB2_SET_INFO */ __constant_cpu_to_le16(2), + /* SMB2_CANCEL */ cpu_to_le16(0), + /* SMB2_ECHO */ cpu_to_le16(4), + /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9), + /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9), + /* SMB2_QUERY_INFO */ cpu_to_le16(9), + /* SMB2_SET_INFO */ cpu_to_le16(2), /* BB FIXME can also be 44 for lease break */ - /* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24) + /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24) }; int diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c5f521bcdee..93fd0586f9e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -600,7 +600,7 @@ smb2_clone_range(const unsigned int xid, goto cchunk_out; /* For now array only one chunk long, will make more flexible later */ - pcchunk->ChunkCount = __constant_cpu_to_le32(1); + pcchunk->ChunkCount = cpu_to_le32(1); pcchunk->Reserved = 0; pcchunk->Reserved2 = 0; @@ -1102,6 +1102,64 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, return rc; } +static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, + loff_t off, loff_t len, bool keep_size) +{ + struct inode *inode; + struct cifsInodeInfo *cifsi; + struct cifsFileInfo *cfile = file->private_data; + long rc = -EOPNOTSUPP; + unsigned int xid; + + xid = get_xid(); + + inode = cfile->dentry->d_inode; + cifsi = CIFS_I(inode); + + /* if file not oplocked can't be sure whether asking to extend size */ + if (!CIFS_CACHE_READ(cifsi)) + if (keep_size == false) + return -EOPNOTSUPP; + + /* + * Files are non-sparse by default so falloc may be a no-op + * Must check if file sparse. If not sparse, and not extending + * then no need to do anything since file already allocated + */ + if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) { + if (keep_size == true) + return 0; + /* check if extending file */ + else if (i_size_read(inode) >= off + len) + /* not extending file and already not sparse */ + return 0; + /* BB: in future add else clause to extend file */ + else + return -EOPNOTSUPP; + } + + if ((keep_size == true) || (i_size_read(inode) >= off + len)) { + /* + * Check if falloc starts within first few pages of file + * and ends within a few pages of the end of file to + * ensure that most of file is being forced to be + * fallocated now. If so then setting whole file sparse + * ie potentially making a few extra pages at the beginning + * or end of the file non-sparse via set_sparse is harmless. + */ + if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) + return -EOPNOTSUPP; + + rc = smb2_set_sparse(xid, tcon, cfile, inode, false); + } + /* BB: else ... in future add code to extend file and set sparse */ + + + free_xid(xid); + return rc; +} + + static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, loff_t off, loff_t len) { @@ -1112,7 +1170,10 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, if (mode & FALLOC_FL_KEEP_SIZE) return smb3_zero_range(file, tcon, off, len, true); return smb3_zero_range(file, tcon, off, len, false); - } + } else if (mode == FALLOC_FL_KEEP_SIZE) + return smb3_simple_falloc(file, tcon, off, len, true); + else if (mode == 0) + return smb3_simple_falloc(file, tcon, off, len, false); return -EOPNOTSUPP; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8f1672bb82d..3417340bf89 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -431,8 +431,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if (rc) goto neg_exit; if (blob_length) - rc = decode_neg_token_init(security_blob, blob_length, - &server->sec_type); + rc = decode_negTokenInit(security_blob, blob_length, server); if (rc == 1) rc = 0; else if (rc == 0) { @@ -1359,7 +1358,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, char *ret_data = NULL; fsctl_input.CompressionState = - __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT); + cpu_to_le16(COMPRESSION_FORMAT_DEFAULT); rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, FSCTL_SET_COMPRESSION, true /* is_fsctl */, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e3188abdafd..ce858477002 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -85,7 +85,7 @@ /* BB FIXME - analyze following length BB */ #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */ -#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) +#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) /* * SMB2 Header Definition @@ -96,7 +96,7 @@ * */ -#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64) +#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64) struct smb2_hdr { __be32 smb2_buf_length; /* big endian on wire */ @@ -137,16 +137,16 @@ struct smb2_transform_hdr { } __packed; /* Encryption Algorithms */ -#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001) +#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001) /* * SMB2 flag definitions */ -#define SMB2_FLAGS_SERVER_TO_REDIR __constant_cpu_to_le32(0x00000001) -#define SMB2_FLAGS_ASYNC_COMMAND __constant_cpu_to_le32(0x00000002) -#define SMB2_FLAGS_RELATED_OPERATIONS __constant_cpu_to_le32(0x00000004) -#define SMB2_FLAGS_SIGNED __constant_cpu_to_le32(0x00000008) -#define SMB2_FLAGS_DFS_OPERATIONS __constant_cpu_to_le32(0x10000000) +#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001) +#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002) +#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004) +#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008) +#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000) /* * Definitions for SMB2 Protocol Data Units (network frames) @@ -157,7 +157,7 @@ struct smb2_transform_hdr { * */ -#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9) +#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9) struct smb2_err_rsp { struct smb2_hdr hdr; @@ -502,12 +502,12 @@ struct create_context { #define SMB2_LEASE_HANDLE_CACHING_HE 0x02 #define SMB2_LEASE_WRITE_CACHING_HE 0x04 -#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00) -#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01) -#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02) -#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04) +#define SMB2_LEASE_NONE cpu_to_le32(0x00) +#define SMB2_LEASE_READ_CACHING cpu_to_le32(0x01) +#define SMB2_LEASE_HANDLE_CACHING cpu_to_le32(0x02) +#define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04) -#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02) +#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02) #define SMB2_LEASE_KEY_SIZE 16 @@ -836,6 +836,25 @@ struct smb2_query_directory_rsp { #define SMB2_O_INFO_SECURITY 0x03 #define SMB2_O_INFO_QUOTA 0x04 +/* Security info type additionalinfo flags. See MS-SMB2 (2.2.37) or MS-DTYP */ +#define OWNER_SECINFO 0x00000001 +#define GROUP_SECINFO 0x00000002 +#define DACL_SECINFO 0x00000004 +#define SACL_SECINFO 0x00000008 +#define LABEL_SECINFO 0x00000010 +#define ATTRIBUTE_SECINFO 0x00000020 +#define SCOPE_SECINFO 0x00000040 +#define BACKUP_SECINFO 0x00010000 +#define UNPROTECTED_SACL_SECINFO 0x10000000 +#define UNPROTECTED_DACL_SECINFO 0x20000000 +#define PROTECTED_SACL_SECINFO 0x40000000 +#define PROTECTED_DACL_SECINFO 0x80000000 + +/* Flags used for FileFullEAinfo */ +#define SL_RESTART_SCAN 0x00000001 +#define SL_RETURN_SINGLE_ENTRY 0x00000002 +#define SL_INDEX_SPECIFIED 0x00000004 + struct smb2_query_info_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 41 */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 9d087f4e7d4..126f46b887c 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -99,9 +99,9 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) something is wrong, unless it is quite a slow link or server */ if ((now - midEntry->when_alloc) > HZ) { if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) { - printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %llu", + pr_debug(" CIFS slow rsp: cmd %d mid %llu", midEntry->command, midEntry->mid); - printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n", + pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n", now - midEntry->when_alloc, now - midEntry->when_sent, now - midEntry->when_received); diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 278f8fdeb9e..46ee6f23898 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -92,7 +92,7 @@ static void coda_flag_children(struct dentry *parent, int flag) struct dentry *de; spin_lock(&parent->d_lock); - list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(de, &parent->d_subdirs, d_child) { /* don't know what to do with negative dentries */ if (de->d_inode ) coda_flag_inode(de->d_inode, flag); diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 1326d38960d..f1714cfb589 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -40,12 +40,6 @@ int coda_iscontrol(const char *name, size_t length) (strncmp(name, CODA_CONTROL, CODA_CONTROLLEN) == 0)); } -/* recognize /coda inode */ -int coda_isroot(struct inode *i) -{ - return ( i->i_sb->s_root->d_inode == i ); -} - unsigned short coda_flags_to_cflags(unsigned short flags) { unsigned short coda_flags = 0; diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index d42b725b1d2..d6f7a76a1f5 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -52,7 +52,6 @@ int coda_setattr(struct dentry *, struct iattr *); /* this file: heloers */ char *coda_f2s(struct CodaFid *f); -int coda_isroot(struct inode *i); int coda_iscontrol(const char *name, size_t length); void coda_vattr_to_iattr(struct inode *, struct coda_vattr *); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9c3dedc000d..7ff025966e4 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -107,7 +107,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig } /* control object, create inode on the fly */ - if (coda_isroot(dir) && coda_iscontrol(name, length)) { + if (is_root_inode(dir) && coda_iscontrol(name, length)) { inode = coda_cnode_makectl(sb); type = CODA_NOCACHE; } else { @@ -195,7 +195,7 @@ static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool struct CodaFid newfid; struct coda_vattr attrs; - if (coda_isroot(dir) && coda_iscontrol(name, length)) + if (is_root_inode(dir) && coda_iscontrol(name, length)) return -EPERM; error = venus_create(dir->i_sb, coda_i2f(dir), name, length, @@ -227,7 +227,7 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode) int error; struct CodaFid newfid; - if (coda_isroot(dir) && coda_iscontrol(name, len)) + if (is_root_inode(dir) && coda_iscontrol(name, len)) return -EPERM; attrs.va_mode = mode; @@ -261,7 +261,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, int len = de->d_name.len; int error; - if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) + if (is_root_inode(dir_inode) && coda_iscontrol(name, len)) return -EPERM; error = venus_link(dir_inode->i_sb, coda_i2f(inode), @@ -287,7 +287,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, int symlen; int error; - if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) + if (is_root_inode(dir_inode) && coda_iscontrol(name, len)) return -EPERM; symlen = strlen(symname); @@ -507,7 +507,7 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) return -ECHILD; inode = de->d_inode; - if (!inode || coda_isroot(inode)) + if (!inode || is_root_inode(inode)) goto out; if (is_bad_inode(inode)) goto bad; diff --git a/fs/compat.c b/fs/compat.c index b13df99f353..6fd272d455e 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -847,10 +847,12 @@ struct compat_readdir_callback { int result; }; -static int compat_fillonedir(void *__buf, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) +static int compat_fillonedir(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) { - struct compat_readdir_callback *buf = __buf; + struct compat_readdir_callback *buf = + container_of(ctx, struct compat_readdir_callback, ctx); struct compat_old_linux_dirent __user *dirent; compat_ulong_t d_ino; @@ -915,11 +917,12 @@ struct compat_getdents_callback { int error; }; -static int compat_filldir(void *__buf, const char *name, int namlen, +static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct compat_linux_dirent __user * dirent; - struct compat_getdents_callback *buf = __buf; + struct compat_getdents_callback *buf = + container_of(ctx, struct compat_getdents_callback, ctx); compat_ulong_t d_ino; int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + namlen + 2, sizeof(compat_long_t)); @@ -1001,11 +1004,13 @@ struct compat_getdents_callback64 { int error; }; -static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int compat_filldir64(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) { struct linux_dirent64 __user *dirent; - struct compat_getdents_callback64 *buf = __buf; + struct compat_getdents_callback64 *buf = + container_of(ctx, struct compat_getdents_callback64, ctx); int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); u64 off; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 668dcabc569..c9c298bd305 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -386,7 +386,7 @@ static void remove_dir(struct dentry * d) if (d->d_inode) simple_rmdir(parent->d_inode,d); - pr_debug(" o %s removing done (%d)\n",d->d_name.name, d_count(d)); + pr_debug(" o %pd removing done (%d)\n", d, d_count(d)); dput(parent); } diff --git a/fs/dcache.c b/fs/dcache.c index 3ffef7f4e5c..e368d4f412f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -44,7 +44,7 @@ /* * Usage: * dcache->d_inode->i_lock protects: - * - i_dentry, d_alias, d_inode of aliases + * - i_dentry, d_u.d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_anon bl list spinlock protects: @@ -59,7 +59,7 @@ * - d_unhashed() * - d_parent and d_subdirs * - childrens' d_child and d_parent - * - d_alias, d_inode + * - d_u.d_alias, d_inode * * Ordering: * dentry->d_inode->i_lock @@ -252,14 +252,12 @@ static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - WARN_ON(!hlist_unhashed(&dentry->d_alias)); kmem_cache_free(dentry_cache, dentry); } static void __d_free_external(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - WARN_ON(!hlist_unhashed(&dentry->d_alias)); kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); } @@ -271,6 +269,7 @@ static inline int dname_external(const struct dentry *dentry) static void dentry_free(struct dentry *dentry) { + WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); if (likely(atomic_dec_and_test(&p->u.count))) { @@ -311,7 +310,7 @@ static void dentry_iput(struct dentry * dentry) struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_u.d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -336,7 +335,7 @@ static void dentry_unlink_inode(struct dentry * dentry) struct inode *inode = dentry->d_inode; __d_clear_type(dentry); dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_u.d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -496,7 +495,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - list_del(&dentry->d_u.d_child); + __list_del_entry(&dentry->d_child); /* * Inform d_walk() that we are no longer attached to the * dentry tree @@ -722,7 +721,7 @@ static struct dentry *__d_find_alias(struct inode *inode) again: discon_alias = NULL; - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -772,12 +771,13 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; restart: spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_lockref.count) { struct dentry *parent = lock_parent(dentry); if (likely(!dentry->d_lockref.count)) { __dentry_kill(dentry); + dput(parent); goto restart; } if (parent) @@ -1050,7 +1050,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1082,33 +1082,31 @@ resume: /* * All done at this level ... ascend and resume the search. */ + rcu_read_lock(); +ascend: if (this_parent != parent) { struct dentry *child = this_parent; this_parent = child->d_parent; - rcu_read_lock(); spin_unlock(&child->d_lock); spin_lock(&this_parent->d_lock); - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (this_parent != child->d_parent || - (child->d_flags & DCACHE_DENTRY_KILLED) || - need_seqretry(&rename_lock, seq)) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + /* might go back up the wrong parent if we have had a rename. */ + if (need_seqretry(&rename_lock, seq)) goto rename_retry; + next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; } rcu_read_unlock(); - next = child->d_u.d_child.next; goto resume; } - if (need_seqretry(&rename_lock, seq)) { - spin_unlock(&this_parent->d_lock); + if (need_seqretry(&rename_lock, seq)) goto rename_retry; - } + rcu_read_unlock(); if (finish) finish(data); @@ -1118,6 +1116,9 @@ out_unlock: return; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + BUG_ON(seq & 1); if (!retry) return; seq = 1; @@ -1454,8 +1455,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_HLIST_NODE(&dentry->d_alias); - INIT_LIST_HEAD(&dentry->d_u.d_child); + INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_LIST_HEAD(&dentry->d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); this_cpu_inc(nr_dentry); @@ -1485,7 +1486,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) */ __dget_dlock(parent); dentry->d_parent = parent; - list_add(&dentry->d_u.d_child, &parent->d_subdirs); + list_add(&dentry->d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); return dentry; @@ -1578,7 +1579,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); __d_set_type(dentry, add_flags); if (inode) - hlist_add_head(&dentry->d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); @@ -1602,7 +1603,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); @@ -1641,7 +1642,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, return NULL; } - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or @@ -1667,7 +1668,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { struct dentry *result; - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); @@ -1698,7 +1699,7 @@ EXPORT_SYMBOL(d_instantiate_unique); */ int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) { - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { @@ -1737,7 +1738,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) if (hlist_empty(&inode->i_dentry)) return NULL; - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); __dget(alias); return alias; } @@ -1799,7 +1800,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) spin_lock(&tmp->d_lock); tmp->d_inode = inode; tmp->d_flags |= add_flags; - hlist_add_head(&tmp->d_alias, &inode->i_dentry); + hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); hlist_bl_unlock(&tmp->d_sb->s_anon); @@ -1888,51 +1889,19 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * if not go ahead and create it now. */ found = d_hash_and_lookup(dentry->d_parent, name); - if (unlikely(IS_ERR(found))) - goto err_out; if (!found) { new = d_alloc(dentry->d_parent, name); if (!new) { found = ERR_PTR(-ENOMEM); - goto err_out; - } - - found = d_splice_alias(inode, new); - if (found) { - dput(new); - return found; - } - return new; - } - - /* - * If a matching dentry exists, and it's not negative use it. - * - * Decrement the reference count to balance the iget() done - * earlier on. - */ - if (found->d_inode) { - if (unlikely(found->d_inode != inode)) { - /* This can't happen because bad inodes are unhashed. */ - BUG_ON(!is_bad_inode(inode)); - BUG_ON(!is_bad_inode(found->d_inode)); + } else { + found = d_splice_alias(inode, new); + if (found) { + dput(new); + return found; + } + return new; } - iput(inode); - return found; } - - /* - * Negative dentry: instantiate it unless the inode is a directory and - * already has a dentry. - */ - new = d_splice_alias(inode, found); - if (new) { - dput(found); - found = new; - } - return found; - -err_out: iput(inode); return found; } @@ -2234,7 +2203,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) struct dentry *child; spin_lock(&dparent->d_lock); - list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dparent->d_subdirs, d_child) { if (dentry == child) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); __dget_dlock(dentry); @@ -2392,6 +2361,8 @@ static void swap_names(struct dentry *dentry, struct dentry *target) */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); + kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN); + kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN); for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); @@ -2525,13 +2496,13 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* splicing a tree */ dentry->d_parent = target->d_parent; target->d_parent = target; - list_del_init(&target->d_u.d_child); - list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + list_del_init(&target->d_child); + list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); } else { /* swapping two dentries */ swap(dentry->d_parent, target->d_parent); - list_move(&target->d_u.d_child, &target->d_parent->d_subdirs); - list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + list_move(&target->d_child, &target->d_parent->d_subdirs); + list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); if (exchange) fsnotify_d_move(target); fsnotify_d_move(dentry); @@ -2607,11 +2578,11 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ -static struct dentry *__d_unalias(struct inode *inode, +static int __d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; - struct dentry *ret = ERR_PTR(-EBUSY); + int ret = -EBUSY; /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) @@ -2626,7 +2597,7 @@ static struct dentry *__d_unalias(struct inode *inode, m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: __d_move(alias, dentry, false); - ret = alias; + ret = 0; out_err: spin_unlock(&inode->i_lock); if (m2) @@ -2661,130 +2632,57 @@ out_err: */ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) { - struct dentry *new = NULL; - if (IS_ERR(inode)) return ERR_CAST(inode); - if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); - new = __d_find_any_alias(inode); - if (new) { - if (!IS_ROOT(new)) { - spin_unlock(&inode->i_lock); - dput(new); - iput(inode); - return ERR_PTR(-EIO); - } - if (d_ancestor(new, dentry)) { - spin_unlock(&inode->i_lock); - dput(new); - iput(inode); - return ERR_PTR(-EIO); - } - write_seqlock(&rename_lock); - __d_move(new, dentry, false); - write_sequnlock(&rename_lock); - spin_unlock(&inode->i_lock); - security_d_instantiate(new, inode); - iput(inode); - } else { - /* already taking inode->i_lock, so d_add() by hand */ - __d_instantiate(dentry, inode); - spin_unlock(&inode->i_lock); - security_d_instantiate(dentry, inode); - d_rehash(dentry); - } - } else { - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) - d_rehash(dentry); - } - return new; -} -EXPORT_SYMBOL(d_splice_alias); - -/** - * d_materialise_unique - introduce an inode into the tree - * @dentry: candidate dentry - * @inode: inode to bind to the dentry, to which aliases may be attached - * - * Introduces an dentry into the tree, substituting an extant disconnected - * root directory alias in its place if there is one. Caller must hold the - * i_mutex of the parent directory. - */ -struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) -{ - struct dentry *actual; - BUG_ON(!d_unhashed(dentry)); if (!inode) { - actual = dentry; __d_instantiate(dentry, NULL); - d_rehash(actual); - goto out_nolock; + goto out; } - spin_lock(&inode->i_lock); - if (S_ISDIR(inode->i_mode)) { - struct dentry *alias; - - /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode); - if (alias) { - actual = alias; + struct dentry *new = __d_find_any_alias(inode); + if (unlikely(new)) { write_seqlock(&rename_lock); - - if (d_ancestor(alias, dentry)) { - /* Check for loops */ - actual = ERR_PTR(-ELOOP); + if (unlikely(d_ancestor(new, dentry))) { + write_sequnlock(&rename_lock); spin_unlock(&inode->i_lock); - } else if (IS_ROOT(alias)) { - /* Is this an anonymous mountpoint that we - * could splice into our tree? */ - __d_move(alias, dentry, false); + dput(new); + new = ERR_PTR(-ELOOP); + pr_warn_ratelimited( + "VFS: Lookup of '%s' in %s %s" + " would have caused loop\n", + dentry->d_name.name, + inode->i_sb->s_type->name, + inode->i_sb->s_id); + } else if (!IS_ROOT(new)) { + int err = __d_unalias(inode, dentry, new); write_sequnlock(&rename_lock); - goto found; + if (err) { + dput(new); + new = ERR_PTR(err); + } } else { - /* Nope, but we must(!) avoid directory - * aliasing. This drops inode->i_lock */ - actual = __d_unalias(inode, dentry, alias); - } - write_sequnlock(&rename_lock); - if (IS_ERR(actual)) { - if (PTR_ERR(actual) == -ELOOP) - pr_warn_ratelimited( - "VFS: Lookup of '%s' in %s %s" - " would have caused loop\n", - dentry->d_name.name, - inode->i_sb->s_type->name, - inode->i_sb->s_id); - dput(alias); + __d_move(new, dentry, false); + write_sequnlock(&rename_lock); + spin_unlock(&inode->i_lock); + security_d_instantiate(new, inode); } - goto out_nolock; + iput(inode); + return new; } } - - /* Add a unique reference */ - actual = __d_instantiate_unique(dentry, inode); - if (!actual) - actual = dentry; - - d_rehash(actual); -found: + /* already taking inode->i_lock, so d_add() by hand */ + __d_instantiate(dentry, inode); spin_unlock(&inode->i_lock); -out_nolock: - if (actual == dentry) { - security_d_instantiate(dentry, inode); - return NULL; - } - - iput(inode); - return actual; +out: + security_d_instantiate(dentry, inode); + d_rehash(dentry); + return NULL; } -EXPORT_SYMBOL_GPL(d_materialise_unique); +EXPORT_SYMBOL(d_splice_alias); static int prepend(char **buffer, int *buflen, const char *str, int namelen) { @@ -3320,7 +3218,7 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode) { inode_dec_link_count(inode); BUG_ON(dentry->d_name.name != dentry->d_iname || - !hlist_unhashed(&dentry->d_alias) || + !hlist_unhashed(&dentry->d_u.d_alias) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 76c08c2beb2..8e0f2f41018 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -692,18 +692,19 @@ EXPORT_SYMBOL_GPL(debugfs_create_u32_array); * because some peripherals have several blocks of identical registers, * for example configuration of dma channels */ -int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, - int nregs, void __iomem *base, char *prefix) +void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, + int nregs, void __iomem *base, char *prefix) { - int i, ret = 0; + int i; for (i = 0; i < nregs; i++, regs++) { if (prefix) - ret += seq_printf(s, "%s", prefix); - ret += seq_printf(s, "%s = 0x%08x\n", regs->name, - readl(base + regs->offset)); + seq_printf(s, "%s", prefix); + seq_printf(s, "%s = 0x%08x\n", regs->name, + readl(base + regs->offset)); + if (seq_has_overflowed(s)) + break; } - return ret; } EXPORT_SYMBOL_GPL(debugfs_print_regs32); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 1e3b99d3db0..05f2960ed7c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -553,7 +553,7 @@ void debugfs_remove_recursive(struct dentry *dentry) * use the d_u.d_child as the rcu head and corrupt this list. */ spin_lock(&parent->d_lock); - list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &parent->d_subdirs, d_child) { if (!debugfs_positive(child)) continue; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 1323c568e36..eea64912c9c 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -48,8 +48,8 @@ static char *print_lockmode(int mode) } } -static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb, - struct dlm_rsb *res) +static void print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb, + struct dlm_rsb *res) { seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); @@ -68,21 +68,17 @@ static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb, if (lkb->lkb_wait_type) seq_printf(s, " wait_type: %d", lkb->lkb_wait_type); - return seq_puts(s, "\n"); + seq_puts(s, "\n"); } -static int print_format1(struct dlm_rsb *res, struct seq_file *s) +static void print_format1(struct dlm_rsb *res, struct seq_file *s) { struct dlm_lkb *lkb; int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; - int rv; lock_rsb(res); - rv = seq_printf(s, "\nResource %p Name (len=%d) \"", - res, res->res_length); - if (rv) - goto out; + seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); for (i = 0; i < res->res_length; i++) { if (isprint(res->res_name[i])) @@ -92,17 +88,16 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s) } if (res->res_nodeid > 0) - rv = seq_printf(s, "\"\nLocal Copy, Master is node %d\n", - res->res_nodeid); + seq_printf(s, "\"\nLocal Copy, Master is node %d\n", + res->res_nodeid); else if (res->res_nodeid == 0) - rv = seq_puts(s, "\"\nMaster Copy\n"); + seq_puts(s, "\"\nMaster Copy\n"); else if (res->res_nodeid == -1) - rv = seq_printf(s, "\"\nLooking up master (lkid %x)\n", - res->res_first_lkid); + seq_printf(s, "\"\nLooking up master (lkid %x)\n", + res->res_first_lkid); else - rv = seq_printf(s, "\"\nInvalid master %d\n", - res->res_nodeid); - if (rv) + seq_printf(s, "\"\nInvalid master %d\n", res->res_nodeid); + if (seq_has_overflowed(s)) goto out; /* Print the LVB: */ @@ -116,8 +111,8 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s) } if (rsb_flag(res, RSB_VALNOTVALID)) seq_puts(s, " (INVALID)"); - rv = seq_puts(s, "\n"); - if (rv) + seq_puts(s, "\n"); + if (seq_has_overflowed(s)) goto out; } @@ -125,32 +120,30 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s) recover_list = !list_empty(&res->res_recover_list); if (root_list || recover_list) { - rv = seq_printf(s, "Recovery: root %d recover %d flags %lx " - "count %d\n", root_list, recover_list, - res->res_flags, res->res_recover_locks_count); - if (rv) - goto out; + seq_printf(s, "Recovery: root %d recover %d flags %lx count %d\n", + root_list, recover_list, + res->res_flags, res->res_recover_locks_count); } /* Print the locks attached to this resource */ seq_puts(s, "Granted Queue\n"); list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) { - rv = print_format1_lock(s, lkb, res); - if (rv) + print_format1_lock(s, lkb, res); + if (seq_has_overflowed(s)) goto out; } seq_puts(s, "Conversion Queue\n"); list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) { - rv = print_format1_lock(s, lkb, res); - if (rv) + print_format1_lock(s, lkb, res); + if (seq_has_overflowed(s)) goto out; } seq_puts(s, "Waiting Queue\n"); list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) { - rv = print_format1_lock(s, lkb, res); - if (rv) + print_format1_lock(s, lkb, res); + if (seq_has_overflowed(s)) goto out; } @@ -159,23 +152,23 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s) seq_puts(s, "Lookup Queue\n"); list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) { - rv = seq_printf(s, "%08x %s", lkb->lkb_id, - print_lockmode(lkb->lkb_rqmode)); + seq_printf(s, "%08x %s", + lkb->lkb_id, print_lockmode(lkb->lkb_rqmode)); if (lkb->lkb_wait_type) seq_printf(s, " wait_type: %d", lkb->lkb_wait_type); - rv = seq_puts(s, "\n"); + seq_puts(s, "\n"); + if (seq_has_overflowed(s)) + goto out; } out: unlock_rsb(res); - return rv; } -static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb, - struct dlm_rsb *r) +static void print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb, + struct dlm_rsb *r) { u64 xid = 0; u64 us; - int rv; if (lkb->lkb_flags & DLM_IFL_USER) { if (lkb->lkb_ua) @@ -188,103 +181,97 @@ static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb, /* id nodeid remid pid xid exflags flags sts grmode rqmode time_us r_nodeid r_len r_name */ - rv = seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n", - lkb->lkb_id, - lkb->lkb_nodeid, - lkb->lkb_remid, - lkb->lkb_ownpid, - (unsigned long long)xid, - lkb->lkb_exflags, - lkb->lkb_flags, - lkb->lkb_status, - lkb->lkb_grmode, - lkb->lkb_rqmode, - (unsigned long long)us, - r->res_nodeid, - r->res_length, - r->res_name); - return rv; + seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n", + lkb->lkb_id, + lkb->lkb_nodeid, + lkb->lkb_remid, + lkb->lkb_ownpid, + (unsigned long long)xid, + lkb->lkb_exflags, + lkb->lkb_flags, + lkb->lkb_status, + lkb->lkb_grmode, + lkb->lkb_rqmode, + (unsigned long long)us, + r->res_nodeid, + r->res_length, + r->res_name); } -static int print_format2(struct dlm_rsb *r, struct seq_file *s) +static void print_format2(struct dlm_rsb *r, struct seq_file *s) { struct dlm_lkb *lkb; - int rv = 0; lock_rsb(r); list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { - rv = print_format2_lock(s, lkb, r); - if (rv) + print_format2_lock(s, lkb, r); + if (seq_has_overflowed(s)) goto out; } list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { - rv = print_format2_lock(s, lkb, r); - if (rv) + print_format2_lock(s, lkb, r); + if (seq_has_overflowed(s)) goto out; } list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) { - rv = print_format2_lock(s, lkb, r); - if (rv) + print_format2_lock(s, lkb, r); + if (seq_has_overflowed(s)) goto out; } out: unlock_rsb(r); - return rv; } -static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb, +static void print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb, int rsb_lookup) { u64 xid = 0; - int rv; if (lkb->lkb_flags & DLM_IFL_USER) { if (lkb->lkb_ua) xid = lkb->lkb_ua->xid; } - rv = seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n", - lkb->lkb_id, - lkb->lkb_nodeid, - lkb->lkb_remid, - lkb->lkb_ownpid, - (unsigned long long)xid, - lkb->lkb_exflags, - lkb->lkb_flags, - lkb->lkb_status, - lkb->lkb_grmode, - lkb->lkb_rqmode, - lkb->lkb_last_bast.mode, - rsb_lookup, - lkb->lkb_wait_type, - lkb->lkb_lvbseq, - (unsigned long long)ktime_to_ns(lkb->lkb_timestamp), - (unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time)); - return rv; + seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n", + lkb->lkb_id, + lkb->lkb_nodeid, + lkb->lkb_remid, + lkb->lkb_ownpid, + (unsigned long long)xid, + lkb->lkb_exflags, + lkb->lkb_flags, + lkb->lkb_status, + lkb->lkb_grmode, + lkb->lkb_rqmode, + lkb->lkb_last_bast.mode, + rsb_lookup, + lkb->lkb_wait_type, + lkb->lkb_lvbseq, + (unsigned long long)ktime_to_ns(lkb->lkb_timestamp), + (unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time)); } -static int print_format3(struct dlm_rsb *r, struct seq_file *s) +static void print_format3(struct dlm_rsb *r, struct seq_file *s) { struct dlm_lkb *lkb; int i, lvblen = r->res_ls->ls_lvblen; int print_name = 1; - int rv; lock_rsb(r); - rv = seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ", - r, - r->res_nodeid, - r->res_first_lkid, - r->res_flags, - !list_empty(&r->res_root_list), - !list_empty(&r->res_recover_list), - r->res_recover_locks_count, - r->res_length); - if (rv) + seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ", + r, + r->res_nodeid, + r->res_first_lkid, + r->res_flags, + !list_empty(&r->res_root_list), + !list_empty(&r->res_recover_list), + r->res_recover_locks_count, + r->res_length); + if (seq_has_overflowed(s)) goto out; for (i = 0; i < r->res_length; i++) { @@ -292,7 +279,7 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) print_name = 0; } - seq_printf(s, "%s", print_name ? "str " : "hex"); + seq_puts(s, print_name ? "str " : "hex"); for (i = 0; i < r->res_length; i++) { if (print_name) @@ -300,8 +287,8 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) else seq_printf(s, " %02x", (unsigned char)r->res_name[i]); } - rv = seq_puts(s, "\n"); - if (rv) + seq_puts(s, "\n"); + if (seq_has_overflowed(s)) goto out; if (!r->res_lvbptr) @@ -311,65 +298,62 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) for (i = 0; i < lvblen; i++) seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]); - rv = seq_puts(s, "\n"); - if (rv) + seq_puts(s, "\n"); + if (seq_has_overflowed(s)) goto out; do_locks: list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { - rv = print_format3_lock(s, lkb, 0); - if (rv) + print_format3_lock(s, lkb, 0); + if (seq_has_overflowed(s)) goto out; } list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { - rv = print_format3_lock(s, lkb, 0); - if (rv) + print_format3_lock(s, lkb, 0); + if (seq_has_overflowed(s)) goto out; } list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) { - rv = print_format3_lock(s, lkb, 0); - if (rv) + print_format3_lock(s, lkb, 0); + if (seq_has_overflowed(s)) goto out; } list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup) { - rv = print_format3_lock(s, lkb, 1); - if (rv) + print_format3_lock(s, lkb, 1); + if (seq_has_overflowed(s)) goto out; } out: unlock_rsb(r); - return rv; } -static int print_format4(struct dlm_rsb *r, struct seq_file *s) +static void print_format4(struct dlm_rsb *r, struct seq_file *s) { int our_nodeid = dlm_our_nodeid(); int print_name = 1; - int i, rv; + int i; lock_rsb(r); - rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ", - r, - r->res_nodeid, - r->res_master_nodeid, - r->res_dir_nodeid, - our_nodeid, - r->res_toss_time, - r->res_flags, - r->res_length); - if (rv) - goto out; + seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ", + r, + r->res_nodeid, + r->res_master_nodeid, + r->res_dir_nodeid, + our_nodeid, + r->res_toss_time, + r->res_flags, + r->res_length); for (i = 0; i < r->res_length; i++) { if (!isascii(r->res_name[i]) || !isprint(r->res_name[i])) print_name = 0; } - seq_printf(s, "%s", print_name ? "str " : "hex"); + seq_puts(s, print_name ? "str " : "hex"); for (i = 0; i < r->res_length; i++) { if (print_name) @@ -377,10 +361,9 @@ static int print_format4(struct dlm_rsb *r, struct seq_file *s) else seq_printf(s, " %02x", (unsigned char)r->res_name[i]); } - rv = seq_puts(s, "\n"); - out: + seq_puts(s, "\n"); + unlock_rsb(r); - return rv; } struct rsbtbl_iter { @@ -390,47 +373,45 @@ struct rsbtbl_iter { int header; }; -/* seq_printf returns -1 if the buffer is full, and 0 otherwise. - If the buffer is full, seq_printf can be called again, but it - does nothing and just returns -1. So, the these printing routines - periodically check the return value to avoid wasting too much time - trying to print to a full buffer. */ +/* + * If the buffer is full, seq_printf can be called again, but it + * does nothing. So, the these printing routines periodically check + * seq_has_overflowed to avoid wasting too much time trying to print to + * a full buffer. + */ static int table_seq_show(struct seq_file *seq, void *iter_ptr) { struct rsbtbl_iter *ri = iter_ptr; - int rv = 0; switch (ri->format) { case 1: - rv = print_format1(ri->rsb, seq); + print_format1(ri->rsb, seq); break; case 2: if (ri->header) { - seq_printf(seq, "id nodeid remid pid xid exflags " - "flags sts grmode rqmode time_ms " - "r_nodeid r_len r_name\n"); + seq_puts(seq, "id nodeid remid pid xid exflags flags sts grmode rqmode time_ms r_nodeid r_len r_name\n"); ri->header = 0; } - rv = print_format2(ri->rsb, seq); + print_format2(ri->rsb, seq); break; case 3: if (ri->header) { - seq_printf(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n"); + seq_puts(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n"); ri->header = 0; } - rv = print_format3(ri->rsb, seq); + print_format3(ri->rsb, seq); break; case 4: if (ri->header) { - seq_printf(seq, "version 4 rsb 2\n"); + seq_puts(seq, "version 4 rsb 2\n"); ri->header = 0; } - rv = print_format4(ri->rsb, seq); + print_format4(ri->rsb, seq); break; } - return rv; + return 0; } static const struct seq_operations format1_seq_ops; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 83f3d552030..35502d4046f 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -5886,6 +5886,78 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, return error; } +/* + * The caller asks for an orphan lock on a given resource with a given mode. + * If a matching lock exists, it's moved to the owner's list of locks and + * the lkid is returned. + */ + +int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, void *name, unsigned int namelen, + unsigned long timeout_cs, uint32_t *lkid) +{ + struct dlm_lkb *lkb; + struct dlm_user_args *ua; + int found_other_mode = 0; + int found = 0; + int rv = 0; + + mutex_lock(&ls->ls_orphans_mutex); + list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) { + if (lkb->lkb_resource->res_length != namelen) + continue; + if (memcmp(lkb->lkb_resource->res_name, name, namelen)) + continue; + if (lkb->lkb_grmode != mode) { + found_other_mode = 1; + continue; + } + + found = 1; + list_del_init(&lkb->lkb_ownqueue); + lkb->lkb_flags &= ~DLM_IFL_ORPHAN; + *lkid = lkb->lkb_id; + break; + } + mutex_unlock(&ls->ls_orphans_mutex); + + if (!found && found_other_mode) { + rv = -EAGAIN; + goto out; + } + + if (!found) { + rv = -ENOENT; + goto out; + } + + lkb->lkb_exflags = flags; + lkb->lkb_ownpid = (int) current->pid; + + ua = lkb->lkb_ua; + + ua->proc = ua_tmp->proc; + ua->xid = ua_tmp->xid; + ua->castparam = ua_tmp->castparam; + ua->castaddr = ua_tmp->castaddr; + ua->bastparam = ua_tmp->bastparam; + ua->bastaddr = ua_tmp->bastaddr; + ua->user_lksb = ua_tmp->user_lksb; + + /* + * The lkb reference from the ls_orphans list was not + * removed above, and is now considered the reference + * for the proc locks list. + */ + + spin_lock(&ua->proc->locks_spin); + list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); + spin_unlock(&ua->proc->locks_spin); + out: + kfree(ua_tmp); + return rv; +} + int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, uint32_t flags, uint32_t lkid, char *lvb_in) { @@ -6029,7 +6101,7 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) struct dlm_args args; int error; - hold_lkb(lkb); + hold_lkb(lkb); /* reference for the ls_orphans list */ mutex_lock(&ls->ls_orphans_mutex); list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans); mutex_unlock(&ls->ls_orphans_mutex); @@ -6217,7 +6289,7 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, { int error = 0; - if (nodeid != dlm_our_nodeid()) { + if (nodeid && (nodeid != dlm_our_nodeid())) { error = send_purge(ls, nodeid, pid); } else { dlm_lock_recovery(ls); diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 5e0c72e36a9..ed8ebd3a859 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -49,6 +49,9 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, int mode, uint32_t flags, uint32_t lkid, char *lvb_in, unsigned long timeout_cs); +int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, void *name, unsigned int namelen, + unsigned long timeout_cs, uint32_t *lkid); int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, uint32_t flags, uint32_t lkid, char *lvb_in); int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 142e21655ee..fb85f32e9ec 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -238,6 +238,7 @@ static int device_user_lock(struct dlm_user_proc *proc, { struct dlm_ls *ls; struct dlm_user_args *ua; + uint32_t lkid; int error = -ENOMEM; ls = dlm_find_lockspace_local(proc->lockspace); @@ -260,12 +261,20 @@ static int device_user_lock(struct dlm_user_proc *proc, ua->bastaddr = params->bastaddr; ua->xid = params->xid; - if (params->flags & DLM_LKF_CONVERT) + if (params->flags & DLM_LKF_CONVERT) { error = dlm_user_convert(ls, ua, params->mode, params->flags, params->lkid, params->lvb, (unsigned long) params->timeout); - else { + } else if (params->flags & DLM_LKF_ORPHAN) { + error = dlm_user_adopt_orphan(ls, ua, + params->mode, params->flags, + params->name, params->namelen, + (unsigned long) params->timeout, + &lkid); + if (!error) + error = lkid; + } else { error = dlm_user_request(ls, ua, params->mode, params->flags, params->name, params->namelen, diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 2f6735dbf1a..c2d6604667b 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1373,7 +1373,7 @@ out: int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode) { struct dentry *lower_dentry = - ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry; ssize_t size; int rc = 0; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index f5bce909655..80154ec4f8c 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -75,11 +75,11 @@ struct ecryptfs_getdents_callback { /* Inspired by generic filldir in fs/readdir.c */ static int -ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, - loff_t offset, u64 ino, unsigned int d_type) +ecryptfs_filldir(struct dir_context *ctx, const char *lower_name, + int lower_namelen, loff_t offset, u64 ino, unsigned int d_type) { struct ecryptfs_getdents_callback *buf = - (struct ecryptfs_getdents_callback *)dirent; + container_of(ctx, struct ecryptfs_getdents_callback, ctx); size_t name_size; char *name; int rc; diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 564a1fa34b9..4626976794e 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -419,7 +419,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) ssize_t size; void *xattr_virt; struct dentry *lower_dentry = - ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry; struct inode *lower_inode = lower_dentry->d_inode; int rc; diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index cdb2971192a..90001da9abf 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -47,8 +47,8 @@ static ssize_t efivarfs_file_write(struct file *file, if (bytes == -ENOENT) { drop_nlink(inode); - d_delete(file->f_dentry); - dput(file->f_dentry); + d_delete(file->f_path.dentry); + dput(file->f_path.dentry); } else { mutex_lock(&inode->i_mutex); i_size_write(inode, datasize + sizeof(attributes)); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 0a48886e069..6dad1176ec5 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -236,6 +236,7 @@ static void efivarfs_kill_sb(struct super_block *sb) } static struct file_system_type efivarfs_type = { + .owner = THIS_MODULE, .name = "efivarfs", .mount = efivarfs_mount, .kill_sb = efivarfs_kill_sb, @@ -244,17 +245,23 @@ static struct file_system_type efivarfs_type = { static __init int efivarfs_init(void) { if (!efi_enabled(EFI_RUNTIME_SERVICES)) - return 0; + return -ENODEV; if (!efivars_kobject()) - return 0; + return -ENODEV; return register_filesystem(&efivarfs_type); } +static __exit void efivarfs_exit(void) +{ + unregister_filesystem(&efivarfs_type); +} + MODULE_AUTHOR("Matthew Garrett, Jeremy Kerr"); MODULE_DESCRIPTION("EFI Variable Filesystem"); MODULE_LICENSE("GPL"); MODULE_ALIAS_FS("efivarfs"); module_init(efivarfs_init); +module_exit(efivarfs_exit); diff --git a/fs/eventfd.c b/fs/eventfd.c index d6a88e7812f..4b0a226024f 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -287,17 +287,14 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c } #ifdef CONFIG_PROC_FS -static int eventfd_show_fdinfo(struct seq_file *m, struct file *f) +static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) { struct eventfd_ctx *ctx = f->private_data; - int ret; spin_lock_irq(&ctx->wqh.lock); - ret = seq_printf(m, "eventfd-count: %16llx\n", - (unsigned long long)ctx->count); + seq_printf(m, "eventfd-count: %16llx\n", + (unsigned long long)ctx->count); spin_unlock_irq(&ctx->wqh.lock); - - return ret; } #endif diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7bcfff900f0..d77f9449135 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -870,25 +870,22 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) } #ifdef CONFIG_PROC_FS -static int ep_show_fdinfo(struct seq_file *m, struct file *f) +static void ep_show_fdinfo(struct seq_file *m, struct file *f) { struct eventpoll *ep = f->private_data; struct rb_node *rbp; - int ret = 0; mutex_lock(&ep->mtx); for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { struct epitem *epi = rb_entry(rbp, struct epitem, rbn); - ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n", - epi->ffd.fd, epi->event.events, - (long long)epi->event.data); - if (ret) + seq_printf(m, "tfd: %8d events: %8x data: %16llx\n", + epi->ffd.fd, epi->event.events, + (long long)epi->event.data); + if (seq_has_overflowed(m)) break; } mutex_unlock(&ep->mtx); - - return ret; } #endif diff --git a/fs/exec.c b/fs/exec.c index 7302b75a982..01aebe30020 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,6 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) goto err; mm->stack_vm = mm->total_vm = 1; + arch_bprm_mm_init(mm, vma); up_write(&mm->mmap_sem); bprm->p = vma->vm_end - sizeof(void *); return 0; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b01fbfb51f4..fdfd206c737 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result, inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) @@ -241,10 +241,11 @@ struct getdents_callback { * A rather strange filldir function to capture * the name matching the specified inode number. */ -static int filldir_one(void * __buf, const char * name, int len, +static int filldir_one(struct dir_context *ctx, const char *name, int len, loff_t pos, u64 ino, unsigned int d_type) { - struct getdents_callback *buf = __buf; + struct getdents_callback *buf = + container_of(ctx, struct getdents_callback, ctx); int result = 0; buf->sequence++; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index d9a17d0b124..e4279ead4a0 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -689,6 +689,9 @@ struct ext2_inode_info { struct mutex truncate_mutex; struct inode vfs_inode; struct list_head i_orphan; /* unlinked but open inodes */ +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif }; /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 170dc41e8bf..ae55fddc26a 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -166,6 +166,10 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) return NULL; ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; +#ifdef CONFIG_QUOTA + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); +#endif + return &ei->vfs_inode; } @@ -303,6 +307,10 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_QUOTA static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static struct dquot **ext2_get_dquots(struct inode *inode) +{ + return EXT2_I(inode)->i_dquot; +} #endif static const struct super_operations ext2_sops = { @@ -320,6 +328,7 @@ static const struct super_operations ext2_sops = { #ifdef CONFIG_QUOTA .quota_read = ext2_quota_read, .quota_write = ext2_quota_write, + .get_dquots = ext2_get_dquots, #endif }; @@ -1090,6 +1099,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; sb->s_qcop = &dquot_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif root = ext2_iget(sb, EXT2_ROOT_INO); diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index fc3cdcf24ae..f483a80b3fe 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h @@ -615,6 +615,10 @@ struct ext3_inode_info { atomic_t i_sync_tid; atomic_t i_datasync_tid; +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif + struct inode vfs_inode; }; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index eb742d0e67f..9b4e7d750d4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -485,6 +485,10 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) ei->vfs_inode.i_version = 1; atomic_set(&ei->i_datasync_tid, 0); atomic_set(&ei->i_sync_tid, 0); +#ifdef CONFIG_QUOTA + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); +#endif + return &ei->vfs_inode; } @@ -764,6 +768,10 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext3_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static struct dquot **ext3_get_dquots(struct inode *inode) +{ + return EXT3_I(inode)->i_dquot; +} static const struct dquot_operations ext3_quota_operations = { .write_dquot = ext3_write_dquot, @@ -803,6 +811,7 @@ static const struct super_operations ext3_sops = { #ifdef CONFIG_QUOTA .quota_read = ext3_quota_read, .quota_write = ext3_quota_write, + .get_dquots = ext3_get_dquots, #endif .bdev_try_to_free_page = bdev_try_to_free_page, }; @@ -2001,6 +2010,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->s_qcop = &ext3_qctl_operations; sb->dq_op = &ext3_quota_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c55a1faaed5..db3f772e57a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -941,6 +941,10 @@ struct ext4_inode_info { tid_t i_sync_tid; tid_t i_datasync_tid; +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif + /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ __u32 i_csum_seed; }; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2c9e6864abd..63e802b8ec6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -892,6 +892,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif ei->jinode = NULL; INIT_LIST_HEAD(&ei->i_rsv_conversion_list); @@ -1068,6 +1069,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, unsigned int flags); static int ext4_enable_quotas(struct super_block *sb); +static struct dquot **ext4_get_dquots(struct inode *inode) +{ + return EXT4_I(inode)->i_dquot; +} + static const struct dquot_operations ext4_quota_operations = { .get_reserved_space = ext4_get_reserved_space, .write_dquot = ext4_write_dquot, @@ -1117,6 +1123,7 @@ static const struct super_operations ext4_sops = { #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, .quota_write = ext4_quota_write, + .get_dquots = ext4_get_dquots, #endif .bdev_try_to_free_page = bdev_try_to_free_page, }; @@ -3932,6 +3939,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_qcop = &ext4_qctl_sysfile_operations; else sb->s_qcop = &ext4_qctl_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 83b9b5a8d11..1ccb26bc2a0 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -162,7 +162,8 @@ fail: return ERR_PTR(-EINVAL); } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, + struct page *dpage) { int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; @@ -172,12 +173,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); if (retval > 0) { value = kmalloc(retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); - retval = f2fs_getxattr(inode, name_index, "", value, retval); + retval = f2fs_getxattr(inode, name_index, "", value, + retval, dpage); } if (retval > 0) @@ -194,6 +196,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + return __f2fs_get_acl(inode, type, NULL); +} + static int __f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { @@ -229,7 +236,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -240,7 +247,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return error; } @@ -249,12 +256,137 @@ int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return __f2fs_set_acl(inode, type, acl, NULL); } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +/* + * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create + * are copied from posix_acl.c + */ +static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, + gfp_t flags) +{ + struct posix_acl *clone = NULL; + + if (acl) { + int size = sizeof(struct posix_acl) + acl->a_count * + sizeof(struct posix_acl_entry); + clone = kmemdup(acl, size, flags); + if (clone) + atomic_set(&clone->a_refcount, 1); + } + return clone; +} + +static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) +{ + struct posix_acl_entry *pa, *pe; + struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; + umode_t mode = *mode_p; + int not_equiv = 0; + + /* assert(atomic_read(acl->a_refcount) == 1); */ + + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_perm &= (mode >> 6) | ~S_IRWXO; + mode &= (pa->e_perm << 6) | ~S_IRWXU; + break; + + case ACL_USER: + case ACL_GROUP: + not_equiv = 1; + break; + + case ACL_GROUP_OBJ: + group_obj = pa; + break; + + case ACL_OTHER: + pa->e_perm &= mode | ~S_IRWXO; + mode &= pa->e_perm | ~S_IRWXO; + break; + + case ACL_MASK: + mask_obj = pa; + not_equiv = 1; + break; + + default: + return -EIO; + } + } + + if (mask_obj) { + mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (mask_obj->e_perm << 3) | ~S_IRWXG; + } else { + if (!group_obj) + return -EIO; + group_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (group_obj->e_perm << 3) | ~S_IRWXG; + } + + *mode_p = (*mode_p & ~S_IRWXUGO) | mode; + return not_equiv; +} + +static int f2fs_acl_create(struct inode *dir, umode_t *mode, + struct posix_acl **default_acl, struct posix_acl **acl, + struct page *dpage) +{ + struct posix_acl *p; + int ret; + + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) + goto no_acl; + + p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); + if (IS_ERR(p)) { + if (p == ERR_PTR(-EOPNOTSUPP)) + goto apply_umask; + return PTR_ERR(p); + } + + if (!p) + goto apply_umask; + + *acl = f2fs_acl_clone(p, GFP_NOFS); + if (!*acl) + return -ENOMEM; + + ret = f2fs_acl_create_masq(*acl, mode); + if (ret < 0) { + posix_acl_release(*acl); + return -ENOMEM; + } + + if (ret == 0) { + posix_acl_release(*acl); + *acl = NULL; + } + + if (!S_ISDIR(*mode)) { + posix_acl_release(p); + *default_acl = NULL; + } else { + *default_acl = p; + } + return 0; + +apply_umask: + *mode &= ~current_umask(); +no_acl: + *default_acl = NULL; + *acl = NULL; + return 0; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, + struct page *dpage) { - struct posix_acl *default_acl, *acl; + struct posix_acl *default_acl = NULL, *acl = NULL; int error = 0; - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); if (error) return error; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index e0864651cdc..997ca8edb6c 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -38,14 +38,15 @@ struct f2fs_acl_header { extern struct posix_acl *f2fs_get_acl(struct inode *, int); extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, + struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL #define f2fs_set_acl NULL static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *page) + struct page *ipage, struct page *dpage) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd10a031c05..e6c271fefac 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -72,36 +72,36 @@ out: return page; } -struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) -{ - bool readahead = false; - struct page *page; - - page = find_get_page(META_MAPPING(sbi), index); - if (!page || (page && !PageUptodate(page))) - readahead = true; - f2fs_put_page(page, 0); - - if (readahead) - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); - return get_meta_page(sbi, index); -} - -static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: - return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; + break; case META_SIT: - return SIT_BLK_CNT(sbi); + if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) + return false; + break; case META_SSA: + if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || + blkaddr < SM_I(sbi)->ssa_blkaddr)) + return false; + break; case META_CP: - return 0; + if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || + blkaddr < __start_cp_addr(sbi))) + return false; + break; case META_POR: - return MAX_BLKADDR(sbi); + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } + + return true; } /* @@ -112,7 +112,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - block_t max_blks = get_max_meta_blks(sbi, type); struct f2fs_io_info fio = { .type = META, @@ -122,18 +121,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type for (; nrpages-- > 0; blkno++) { block_t blk_addr; + if (!is_valid_blkaddr(sbi, blkno, type)) + goto out; + switch (type) { case META_NAT: - /* get nat block addr */ - if (unlikely(blkno >= max_blks)) + if (unlikely(blkno >= + NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; + /* get nat block addr */ blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - if (unlikely(blkno >= max_blks)) - goto out; blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); if (blkno != start && prev_blk_addr + 1 != blk_addr) @@ -143,10 +144,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type case META_SSA: case META_CP: case META_POR: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < SEG0_BLKADDR(sbi))) - goto out; blk_addr = blkno; break; default: @@ -169,6 +166,20 @@ out: return blkno - start; } +void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + bool readahead = false; + + page = find_get_page(META_MAPPING(sbi), index); + if (!page || (page && !PageUptodate(page))) + readahead = true; + f2fs_put_page(page, 0); + + if (readahead) + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); +} + static int f2fs_write_meta_page(struct page *page, struct writeback_control *wbc) { @@ -178,7 +189,7 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; - if (wbc->for_reclaim) + if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; @@ -187,6 +198,9 @@ static int f2fs_write_meta_page(struct page *page, write_meta_page(sbi, page); dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, META, WRITE); return 0; redirty_out: @@ -298,46 +312,57 @@ const struct address_space_operations f2fs_meta_aops = { static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: - spin_lock(&sbi->ino_lock[type]); + if (radix_tree_preload(GFP_NOFS)) { + cond_resched(); + goto retry; + } + + spin_lock(&im->ino_lock); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); goto retry; } - if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { - spin_unlock(&sbi->ino_lock[type]); + if (radix_tree_insert(&im->ino_root, ino, e)) { + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); + radix_tree_preload_end(); goto retry; } memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; - list_add_tail(&e->list, &sbi->ino_list[type]); + list_add_tail(&e->list, &im->ino_list); + if (type != ORPHAN_INO) + im->ino_num++; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[type]); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); if (e) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[type], ino); - if (type == ORPHAN_INO) - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[type]); + radix_tree_delete(&im->ino_root, ino); + im->ino_num--; + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); return; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -355,10 +380,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) /* mode should be APPEND_INO or UPDATE_INO */ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { + struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[mode]); - e = radix_tree_lookup(&sbi->ino_root[mode], ino); - spin_unlock(&sbi->ino_lock[mode]); + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + spin_unlock(&im->ino_lock); return e ? true : false; } @@ -368,36 +395,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) int i; for (i = APPEND_INO; i <= UPDATE_INO; i++) { - spin_lock(&sbi->ino_lock[i]); - list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { + struct inode_management *im = &sbi->im[i]; + + spin_lock(&im->ino_lock); + list_for_each_entry_safe(e, tmp, &im->ino_list, list) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[i], e->ino); + radix_tree_delete(&im->ino_root, e->ino); kmem_cache_free(ino_entry_slab, e); + im->ino_num--; } - spin_unlock(&sbi->ino_lock[i]); + spin_unlock(&im->ino_lock); } } int acquire_orphan_inode(struct f2fs_sb_info *sbi) { + struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->n_orphans >= sbi->max_orphans)) + spin_lock(&im->ino_lock); + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else - sbi->n_orphans++; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + im->ino_num++; + spin_unlock(&im->ino_lock); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->n_orphans == 0); - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + spin_lock(&im->ino_lock); + f2fs_bug_on(sbi, im->ino_num == 0); + im->ino_num--; + spin_unlock(&im->ino_lock); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -460,17 +493,19 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = - (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); + unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - head = &sbi->ino_list[ORPHAN_INO]; + spin_lock(&im->ino_lock); + head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { @@ -510,7 +545,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) f2fs_put_page(page, 1); } - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + spin_unlock(&im->ino_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -731,6 +766,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) struct dir_inode_entry *entry; struct inode *inode; retry: + if (unlikely(f2fs_cp_error(sbi))) + return; + spin_lock(&sbi->dir_inode_lock); head = &sbi->dir_inode_list; @@ -830,6 +868,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; struct page *cp_page; @@ -889,7 +928,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); + orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); @@ -905,7 +944,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); } - if (sbi->n_orphans) + if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -940,7 +979,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_put_page(cp_page, 1); } - if (sbi->n_orphans) { + if (orphan_num) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } @@ -975,6 +1014,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + /* wait for previous submitted meta pages writeback */ + wait_on_all_pages_writeback(sbi); + release_dirty_inode(sbi); if (unlikely(f2fs_cp_error(sbi))) @@ -1036,9 +1078,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) int i; for (i = 0; i < MAX_INO_ENTRY; i++) { - INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); - spin_lock_init(&sbi->ino_lock[i]); - INIT_LIST_HEAD(&sbi->ino_list[i]); + struct inode_management *im = &sbi->im[i]; + + INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); + spin_lock_init(&im->ino_lock); + INIT_LIST_HEAD(&im->ino_list); + im->ino_num = 0; } /* @@ -1047,7 +1092,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * orphan entries with the limitation one reserved segment * for cp pack we can have max 1020*504 orphan entries */ - sbi->n_orphans = 0; sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e58c4cc2cb..7ec697b37f1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err) dec_page_count(sbi, F2FS_WRITEBACK); } - if (sbi->wait_io) { - complete(sbi->wait_io); - sbi->wait_io = NULL; - } - if (!get_pages(sbi, F2FS_WRITEBACK) && !list_empty(&sbi->cp_wait.task_list)) wake_up(&sbi->cp_wait); @@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; - int rw; if (!io->bio) return; - rw = fio->rw; - - if (is_read_io(rw)) { - trace_f2fs_submit_read_bio(io->sbi->sb, rw, - fio->type, io->bio); - submit_bio(rw, io->bio); - } else { - trace_f2fs_submit_write_bio(io->sbi->sb, rw, - fio->type, io->bio); - /* - * META_FLUSH is only from the checkpoint procedure, and we - * should wait this metadata bio for FS consistency. - */ - if (fio->type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->sbi->wait_io = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); - } else { - submit_bio(rw, io->bio); - } - } + if (is_read_io(fio->rw)) + trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + else + trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + submit_bio(fio->rw, io->bio); io->bio = NULL; } @@ -257,9 +236,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - /* if inode_page exists, index should be zero */ - f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); - err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; @@ -740,14 +716,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - int ret; + int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); - else + if (ret == -EAGAIN) ret = mpage_readpage(page, get_data_block); return ret; @@ -859,10 +835,11 @@ write: else if (has_not_enough_free_secs(sbi, 0)) goto redirty_out; + err = -EAGAIN; f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) - err = f2fs_write_inline_data(inode, page, offset); - else + if (f2fs_has_inline_data(inode)) + err = f2fs_write_inline_data(inode, page); + if (err == -EAGAIN) err = do_write_data_page(page, &fio); f2fs_unlock_op(sbi); done: @@ -951,7 +928,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct page *page, *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; @@ -959,45 +936,60 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); f2fs_balance_fs(sbi); -repeat: - err = f2fs_convert_inline_data(inode, pos + len, NULL); - if (err) - goto fail; + /* + * We should check this at this moment to avoid deadlock on inode page + * and #0 page. The locking rule for inline_data conversion should be: + * lock_page(page #0) -> lock_page(inode_page) + */ + if (index != 0) { + err = f2fs_convert_inline_inode(inode); + if (err) + goto fail; + } +repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { err = -ENOMEM; goto fail; } - /* to avoid latency during memory pressure */ - unlock_page(page); - *pagep = page; - if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) - goto inline_data; - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, index); - f2fs_unlock_op(sbi); - if (err) { - f2fs_put_page(page, 0); - goto fail; - } -inline_data: - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto unlock_fail; } - f2fs_wait_on_page_writeback(page, DATA); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) { + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto put_next; + } + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto put_fail; + } + err = f2fs_reserve_block(&dn, index); + if (err) + goto put_fail; +put_next: + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; + f2fs_wait_on_page_writeback(page, DATA); + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { unsigned start = pos & (PAGE_CACHE_SIZE - 1); unsigned end = start + len; @@ -1010,18 +1002,10 @@ inline_data: if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); - if (err) - goto fail; - } + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); + if (err) + goto fail; lock_page(page); if (unlikely(!PageUptodate(page))) { @@ -1038,6 +1022,12 @@ out: SetPageUptodate(page); clear_cold_data(page); return 0; + +put_fail: + f2fs_put_dnode(&dn); +unlock_fail: + f2fs_unlock_op(sbi); + f2fs_put_page(page, 1); fail: f2fs_write_failed(mapping, pos + len); return err; @@ -1052,10 +1042,7 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - register_inmem_page(inode, page); - else - set_page_dirty(page); + set_page_dirty(page); if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); @@ -1093,9 +1080,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int err; - /* Let buffer I/O handle the inline data case. */ - if (f2fs_has_inline_data(inode)) - return 0; + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } if (check_direct_IO(inode, rw, iter, offset)) return 0; @@ -1119,6 +1109,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) return; + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + invalidate_inmem_page(inode, page); + if (PageDirty(page)) inode_dec_dirty_pages(inode); ClearPagePrivate(page); @@ -1138,6 +1131,12 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); SetPageUptodate(page); + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { + register_inmem_page(inode, page); + return 1; + } + mark_inode_dirty(inode); if (!PageDirty(page)) { @@ -1152,9 +1151,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - if (f2fs_has_inline_data(inode)) - return 0; - + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0a91ab813a9..91e8f699ab3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,13 +39,15 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); - si->inline_inode = sbi->inline_inode; + si->inline_inode = atomic_read(&sbi->inline_inode); + si->inline_dir = atomic_read(&sbi->inline_dir); si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -118,6 +120,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; + int i; if (si->base_mem) goto get_cache; @@ -167,8 +170,9 @@ get_cache: si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + for (i = 0; i <= UPDATE_INO; i++) + si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); } static int stat_show(struct seq_file *s, void *v) @@ -200,6 +204,8 @@ static int stat_show(struct seq_file *s, void *v) si->valid_count - si->valid_node_count); seq_printf(s, " - Inline_data Inode: %u\n", si->inline_inode); + seq_printf(s, " - Inline_dentry Inode: %u\n", + si->inline_dir); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); @@ -244,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - inmem: %4d\n", + si->inmem_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", @@ -321,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; + atomic_set(&sbi->inline_inode, 0); + atomic_set(&sbi->inline_dir, 0); + mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); mutex_unlock(&f2fs_stat_mutex); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b54f87149c0..b1a7d5737cd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { +unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, @@ -59,7 +59,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) { umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; @@ -90,51 +90,70 @@ static bool early_match_name(size_t namelen, f2fs_hash_t namehash, } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, - f2fs_hash_t namehash, struct page **res_page) + struct qstr *name, int *max_slots, + struct page **res_page) +{ + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + + dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + de = find_target_dentry(name, max_slots, &d); + + if (de) + *res_page = dentry_page; + else + kunmap(dentry_page); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); + return de; +} + +struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, + struct f2fs_dentry_ptr *d) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); - const void *dentry_bits = &dentry_blk->dentry_bitmap; + f2fs_hash_t namehash = f2fs_dentry_hash(name); int max_len = 0; - while (bit_pos < NR_DENTRY_IN_BLOCK) { - if (!test_bit_le(bit_pos, dentry_bits)) { + if (max_slots) + *max_slots = 0; + while (bit_pos < d->max) { + if (!test_bit_le(bit_pos, d->bitmap)) { if (bit_pos == 0) max_len = 1; - else if (!test_bit_le(bit_pos - 1, dentry_bits)) + else if (!test_bit_le(bit_pos - 1, d->bitmap)) max_len++; bit_pos++; continue; } - de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = dentry_page; - goto found; - } - } - if (max_len > *max_slots) { + de = &d->dentry[bit_pos]; + if (early_match_name(name->len, namehash, de) && + !memcmp(d->filename[bit_pos], name->name, name->len)) + goto found; + + if (max_slots && *max_slots >= 0 && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs has occurred. - */ - f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); + /* remain bug on condition */ + if (unlikely(!de->name_len)) + d->max = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; - kunmap(dentry_page); found: - if (max_len > *max_slots) + if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } @@ -149,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; - int max_slots = 0; + int max_slots; f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -168,8 +187,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, continue; } - de = find_in_block(dentry_page, name, &max_slots, - namehash, res_page); + de = find_in_block(dentry_page, name, &max_slots, res_page); if (de) break; @@ -201,6 +219,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + if (f2fs_has_inline_dentry(dir)) + return find_in_inline_dir(dir, child, res_page); + if (npages == 0) return NULL; @@ -227,6 +248,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) struct f2fs_dir_entry *de; struct f2fs_dentry_block *dentry_blk; + if (f2fs_has_inline_dentry(dir)) + return f2fs_parent_inline_dir(dir, p); + page = get_lock_data_page(dir, 0); if (IS_ERR(page)) return NULL; @@ -247,7 +271,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page); if (de) { res = le32_to_cpu(de->ino); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } @@ -257,11 +281,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { + enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -296,36 +322,48 @@ int update_dent_inode(struct inode *inode, const struct qstr *name) return 0; } -static int make_empty_dir(struct inode *inode, - struct inode *parent, struct page *page) +void do_make_empty_dir(struct inode *inode, struct inode *parent, + struct f2fs_dentry_ptr *d) { - struct page *dentry_page; - struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; - dentry_page = get_new_data_page(inode, page, 0, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); - - - dentry_blk = kmap_atomic(dentry_page); - - de = &dentry_blk->dentry[0]; + de = &d->dentry[0]; de->name_len = cpu_to_le16(1); de->hash_code = 0; de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); + memcpy(d->filename[0], ".", 1); set_de_type(de, inode); - de = &dentry_blk->dentry[1]; + de = &d->dentry[1]; de->hash_code = 0; de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); + memcpy(d->filename[1], "..", 2); set_de_type(de, inode); - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(0, (void *)d->bitmap); + test_and_set_bit_le(1, (void *)d->bitmap); +} + +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_ptr d; + + if (f2fs_has_inline_dentry(inode)) + return make_empty_inline_dir(inode, parent, page); + + dentry_page = get_new_data_page(inode, page, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + dentry_blk = kmap_atomic(dentry_page); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + do_make_empty_dir(inode, parent, &d); + kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); @@ -333,8 +371,8 @@ static int make_empty_dir(struct inode *inode, return 0; } -static struct page *init_inode_metadata(struct inode *inode, - struct inode *dir, const struct qstr *name) +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, + const struct qstr *name, struct page *dpage) { struct page *page; int err; @@ -350,7 +388,7 @@ static struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir, page); + err = f2fs_init_acl(inode, dir, page, dpage); if (err) goto put_error; @@ -395,7 +433,7 @@ error: return ERR_PTR(err); } -static void update_parent_metadata(struct inode *dir, struct inode *inode, +void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { @@ -417,27 +455,23 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } -static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +int room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_start); - if (zero_start >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - zero_start); + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; - if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + if (zero_end + 1 >= max_slots) + return max_slots; goto next; } @@ -463,6 +497,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; int i; + if (f2fs_has_inline_dentry(dir)) { + err = f2fs_add_inline_entry(dir, name, inode); + if (!err || err != -EAGAIN) + return err; + else + err = 0; + } + dentry_hash = f2fs_dentry_hash(name); level = 0; current_depth = F2FS_I(dir)->i_current_depth; @@ -491,7 +533,8 @@ start: return PTR_ERR(dentry_page); dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; @@ -506,7 +549,7 @@ add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); + page = init_inode_metadata(inode, dir, name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -545,7 +588,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL); + page = init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -560,26 +603,57 @@ fail: return err; } +void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + down_write(&F2FS_I(inode)->i_sem); + + if (S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + if (page) + update_inode(dir, page); + else + update_inode_page(dir); + } + inode->i_ctime = CURRENT_TIME; + + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + up_write(&F2FS_I(inode)->i_sem); + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); +} + /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *inode) + struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; - struct inode *dir = page->mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + if (f2fs_has_inline_dentry(dir)) + return f2fs_delete_inline_entry(dentry, page, dir, inode); + lock_page(page); f2fs_wait_on_page_writeback(page, DATA); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) - test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, @@ -590,29 +664,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; - if (inode) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - down_write(&F2FS_I(inode)->i_sem); - - if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } - inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); - } - up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); - - if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); - else - release_orphan_inode(sbi); - } + if (inode) + f2fs_drop_nlink(dir, inode, NULL); if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); @@ -628,9 +681,12 @@ bool f2fs_empty_dir(struct inode *dir) unsigned long bidx; struct page *dentry_page; unsigned int bit_pos; - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); + if (f2fs_has_inline_dentry(dir)) + return f2fs_empty_inline_dir(dir); + for (bidx = 0; bidx < nblock; bidx++) { dentry_page = get_lock_data_page(dir, bidx); if (IS_ERR(dentry_page)) { @@ -640,7 +696,6 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); if (bidx == 0) bit_pos = 2; @@ -659,19 +714,48 @@ bool f2fs_empty_dir(struct inode *dir) return true; } +bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, + unsigned int start_pos) +{ + unsigned char d_type = DT_UNKNOWN; + unsigned int bit_pos; + struct f2fs_dir_entry *de = NULL; + + bit_pos = ((unsigned long)ctx->pos % d->max); + + while (bit_pos < d->max) { + bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); + if (bit_pos >= d->max) + break; + + de = &d->dentry[bit_pos]; + if (de->file_type < F2FS_FT_MAX) + d_type = f2fs_filetype_table[de->file_type]; + else + d_type = DT_UNKNOWN; + if (!dir_emit(ctx, d->filename[bit_pos], + le16_to_cpu(de->name_len), + le32_to_cpu(de->ino), d_type)) + return true; + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + ctx->pos = start_pos + bit_pos; + } + return false; +} + static int f2fs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); - unsigned int bit_pos = 0; struct f2fs_dentry_block *dentry_blk = NULL; - struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); - unsigned char d_type = DT_UNKNOWN; + struct f2fs_dentry_ptr d; - bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + if (f2fs_has_inline_dentry(inode)) + return f2fs_read_inline_dir(file, ctx); /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) @@ -684,28 +768,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) continue; dentry_blk = kmap(dentry_page); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - break; - - de = &dentry_blk->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - if (!dir_emit(ctx, - dentry_blk->filename[bit_pos], - le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) - goto stop; - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; - } - bit_pos = 0; + make_dentry_ptr(&d, (void *)dentry_blk, 1); + + if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK)) + goto stop; + ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8171e80b2ee..ec58bb2373f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -46,8 +46,10 @@ #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 +#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 +#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 +#define F2FS_MOUNT_NOBARRIER 0x00000800 +#define F2FS_MOUNT_FASTBOOT 0x00001000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -211,6 +213,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * For INODE and NODE manager */ +/* for directory operations */ +struct f2fs_dentry_ptr { + const void *bitmap; + struct f2fs_dir_entry *dentry; + __u8 (*filename)[F2FS_SLOT_LEN]; + int max; +}; + +static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d, + void *src, int type) +{ + if (type == 1) { + struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } else { + struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } +} + /* * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 * as its node offset to distinguish from index node blocks. @@ -269,6 +297,7 @@ struct f2fs_inode_info { struct extent_info ext; /* in-memory extent cache entry */ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ }; @@ -303,7 +332,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -433,6 +462,7 @@ enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_NODES, F2FS_DIRTY_META, + F2FS_INMEM_PAGES, NR_COUNT_TYPE, }; @@ -470,6 +500,14 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +/* for inner inode cache management */ +struct inode_management { + struct radix_tree_root ino_root; /* ino entry array */ + spinlock_t ino_lock; /* for ino entry lock */ + struct list_head ino_list; /* inode list head */ + unsigned long ino_num; /* number of entries */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -488,7 +526,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct completion *wait_io; /* for completion bios */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ @@ -500,13 +537,9 @@ struct f2fs_sb_info { bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; - /* for inode management */ - struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ - spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ - struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ /* for orphan inode, use 0'th array */ - unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ /* for directory inode management */ @@ -557,7 +590,8 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ - int inline_inode; /* # of inline_data inodes */ + atomic_t inline_inode; /* # of inline_data inodes */ + atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -988,6 +1022,13 @@ retry: return entry; } +static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + while (radix_tree_insert(root, index, item)) + cond_resched(); +} + #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) @@ -1020,7 +1061,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } -static inline int f2fs_set_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1032,7 +1073,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr) return ret; } -static inline int f2fs_clear_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1044,6 +1085,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) return ret; } +static inline void f2fs_change_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr ^= mask; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -1057,11 +1107,13 @@ enum { FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_DATA_EXIST, /* indicate data exists */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1087,15 +1139,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) set_inode_flag(fi, FI_ACL_MODE); } -static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - clear_inode_flag(fi, FI_ACL_MODE); - return 1; - } - return 0; -} - static inline void get_inline_info(struct f2fs_inode_info *fi, struct f2fs_inode *ri) { @@ -1103,6 +1146,10 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) set_inode_flag(fi, FI_INLINE_DATA); + if (ri->i_inline & F2FS_INLINE_DENTRY) + set_inode_flag(fi, FI_INLINE_DENTRY); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(fi, FI_DATA_EXIST); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1114,6 +1161,10 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_XATTR; if (is_inode_flag_set(fi, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; + if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + ri->i_inline |= F2FS_INLINE_DENTRY; + if (is_inode_flag_set(fi, FI_DATA_EXIST)) + ri->i_inline |= F2FS_DATA_EXIST; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1148,6 +1199,17 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); } +static inline void f2fs_clear_inline_inode(struct inode *inode) +{ + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); +} + +static inline int f2fs_exist_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1164,6 +1226,23 @@ static inline void *inline_data_addr(struct page *page) return (void *)&(ri->i_addr[1]); } +static inline int f2fs_has_inline_dentry(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); +} + +static inline void *inline_dentry_addr(struct page *page) +{ + struct f2fs_inode *ri = F2FS_INODE(page); + return (void *)&(ri->i_addr[1]); +} + +static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) +{ + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -1224,6 +1303,19 @@ struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ +extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; +void set_de_type(struct f2fs_dir_entry *, struct inode *); +struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, + struct f2fs_dentry_ptr *); +bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, + unsigned int); +void do_make_empty_dir(struct inode *, struct inode *, + struct f2fs_dentry_ptr *); +struct page *init_inode_metadata(struct inode *, struct inode *, + const struct qstr *, struct page *); +void update_parent_metadata(struct inode *, struct inode *, unsigned int); +int room_for_filename(const void *, int, int); +void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); @@ -1232,7 +1324,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, + struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); @@ -1296,6 +1389,7 @@ void destroy_node_manager_caches(void); * segment.c */ void register_inmem_page(struct inode *, struct page *); +void invalidate_inmem_page(struct inode *, struct page *); void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); @@ -1337,8 +1431,8 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); +void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); @@ -1405,7 +1499,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc, inline_inode; + int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -1438,14 +1532,23 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_dec_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \ + } while (0) +#define stat_inc_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \ + } while (0) +#define stat_dec_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) - #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -1492,6 +1595,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_read_hit(sb) #define stat_inc_inline_inode(inode) #define stat_dec_inline_inode(inode) +#define stat_inc_inline_dir(inode) +#define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) @@ -1519,9 +1624,20 @@ extern const struct inode_operations f2fs_special_inode_operations; * inline.c */ bool f2fs_may_inline(struct inode *); +void read_inline_data(struct page *, struct page *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); -int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); -void truncate_inline_data(struct inode *, u64); +int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); +int f2fs_convert_inline_inode(struct inode *); +int f2fs_write_inline_data(struct inode *, struct page *); +void truncate_inline_data(struct page *, u64); bool recover_inline_data(struct inode *, struct page *); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); +int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, + struct inode *, struct inode *); +bool f2fs_empty_inline_dir(struct inode *); +int f2fs_read_inline_dir(struct file *, struct dir_context *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e68bb64f83..3c27e0ecb3b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -41,18 +41,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); - /* force to convert with normal data indices */ - err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); - if (err) - goto out; + f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, page->index); - f2fs_unlock_op(sbi); - if (err) + if (err) { + f2fs_unlock_op(sbi); goto out; + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); @@ -130,10 +130,45 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) need_cp = true; + else if (test_opt(sbi, FASTBOOT)) + need_cp = true; + else if (sbi->active_logs == 2) + need_cp = true; return need_cp; } +static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + bool ret = false; + /* But we need to avoid that there are some inode updates */ + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + ret = true; + f2fs_put_page(i, 0); + return ret; +} + +static void try_to_fix_pino(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + nid_t pino; + + down_write(&fi->i_sem); + fi->xattr_ver = 0; + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + fi->i_pino = pino; + file_got_pino(inode); + up_write(&fi->i_sem); + + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + } else { + up_write(&fi->i_sem); + } +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -164,19 +199,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return ret; } + /* if the inode is dirty, let's recover all the time */ + if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { + update_inode_page(inode); + goto go_write; + } + /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); - /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { - f2fs_put_page(i, 0); + /* it may call write_inode just prior to fsync */ + if (need_inode_page_update(sbi, ino)) goto go_write; - } - f2fs_put_page(i, 0); if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) @@ -196,49 +233,36 @@ go_write: up_read(&fi->i_sem); if (need_cp) { - nid_t pino; - /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - down_write(&fi->i_sem); - F2FS_I(inode)->xattr_ver = 0; - if (file_wrong_pino(inode) && inode->i_nlink == 1 && - get_parent_ino(inode, &pino)) { - F2FS_I(inode)->i_pino = pino; - file_got_pino(inode); - up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } else { - up_write(&fi->i_sem); - } - } else { + /* + * We've secured consistency through sync_fs. Following pino + * will be used only for fsynced inodes after checkpoint. + */ + try_to_fix_pino(inode); + goto out; + } sync_nodes: - sync_node_pages(sbi, ino, &wbc); - - if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - goto sync_nodes; - } + sync_node_pages(sbi, ino, &wbc); - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + if (need_inode_block_update(sbi, ino)) { + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + goto sync_nodes; + } + + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; - /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + /* once recovery info is written, don't need to tack this */ + remove_dirty_inode(sbi, ino, APPEND_INO); + clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(F2FS_I_SB(inode)); - } + remove_dirty_inode(sbi, ino, UPDATE_INO); + clear_inode_flag(fi, FI_UPDATE_WRITE); + ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; @@ -296,7 +320,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode)) { + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; @@ -374,6 +398,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct inode *inode = file_inode(file); + + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; @@ -415,20 +448,17 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static void truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - if (f2fs_has_inline_data(inode)) - return truncate_inline_data(inode, from); - if (!offset) - return; + return 0; page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); if (IS_ERR(page)) - return; + return 0; lock_page(page); if (unlikely(!PageUptodate(page) || @@ -438,9 +468,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); set_page_dirty(page); - out: f2fs_put_page(page, 1); + return 0; } int truncate_blocks(struct inode *inode, u64 from, bool lock) @@ -450,27 +480,33 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; + struct page *ipage; trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode)) - goto done; - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + ((from + blocksize - 1) >> (sbi->log_blocksize)); if (lock) f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - if (lock) - f2fs_unlock_op(sbi); - trace_f2fs_truncate_blocks_exit(inode, err); - return err; + goto out; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -486,11 +522,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +out: if (lock) f2fs_unlock_op(sbi); -done: + /* lastly zero out the first data page */ - truncate_partial_data_page(inode, from); + if (!err) + err = truncate_partial_data_page(inode, from); trace_f2fs_truncate_blocks_exit(inode, err); return err; @@ -504,6 +542,12 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); + /* we should check inline_data size */ + if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) { + if (f2fs_convert_inline_inode(inode)) + return; + } + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); @@ -561,10 +605,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { - err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); - if (err) - return err; - if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); @@ -665,9 +705,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (offset >= inode->i_size) return ret; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -721,9 +763,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -874,7 +918,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); + return f2fs_convert_inline_inode(inode); +} + +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + commit_inmem_pages(inode, true); + return 0; } static int f2fs_ioc_commit_atomic_write(struct file *filp) @@ -908,7 +960,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) return -EACCES; set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - return 0; + + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) @@ -985,6 +1038,7 @@ const struct file_operations f2fs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .open = generic_file_open, + .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2a8f4acdb86..eec0933a481 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; int err = 0; - if (!test_opt(sbi, BG_GC)) - goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); if (!gc_th) { err = -ENOMEM; @@ -340,34 +338,39 @@ static const struct victim_selection default_v_ops = { .get_victim = get_victim_by_default, }; -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) { struct inode_entry *ie; - list_for_each_entry(ie, ilist, list) - if (ie->inode->i_ino == ino) - return ie->inode; + ie = radix_tree_lookup(&gc_list->iroot, ino); + if (ie) + return ie->inode; return NULL; } -static void add_gc_inode(struct inode *inode, struct list_head *ilist) +static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; - if (inode == find_gc_inode(inode->i_ino, ilist)) { + if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - list_add_tail(&new_ie->list, ilist); +retry: + if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { + cond_resched(); + goto retry; + } + list_add_tail(&new_ie->list, &gc_list->ilist); } -static void put_gc_inode(struct list_head *ilist) +static void put_gc_inode(struct gc_inode_list *gc_list) { struct inode_entry *ie, *next_ie; - list_for_each_entry_safe(ie, next_ie, ilist, list) { + list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { + radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); kmem_cache_free(winode_slab, ie); @@ -553,7 +556,7 @@ out: * the victim data block is ignored. */ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct list_head *ilist, unsigned int segno, int gc_type) + struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; @@ -605,27 +608,27 @@ next_step: data_page = find_data_page(inode, start_bidx + ofs_in_node, false); - if (IS_ERR(data_page)) - goto next_iput; + if (IS_ERR(data_page)) { + iput(inode); + continue; + } f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - start_bidx = start_bidx_of_node(nofs, - F2FS_I(inode)); - data_page = get_lock_data_page(inode, + add_gc_inode(gc_list, inode); + continue; + } + + /* phase 3 */ + inode = find_gc_inode(gc_list, dni.ino); + if (inode) { + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = get_lock_data_page(inode, start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); - } + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); } - continue; -next_iput: - iput(inode); } if (++phase < 4) @@ -646,18 +649,20 @@ next_iput: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type, int type) + int gc_type) { struct sit_info *sit_i = SIT_I(sbi); int ret; + mutex_lock(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, + NO_CHECK_TYPE, LFS); mutex_unlock(&sit_i->sentry_lock); return ret; } static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, - struct list_head *ilist, int gc_type) + struct gc_inode_list *gc_list, int gc_type) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -675,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, gc_node_segment(sbi, sum->entries, segno, gc_type); break; case SUM_TYPE_DATA: - gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); break; } blk_finish_plug(&plug); @@ -688,16 +693,18 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, int f2fs_gc(struct f2fs_sb_info *sbi) { - struct list_head ilist; unsigned int segno, i; int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc = { - .reason = CP_SYNC, + struct cp_control cpc; + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(GFP_NOFS), }; - INIT_LIST_HEAD(&ilist); + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; + gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; @@ -709,7 +716,7 @@ gc_more: write_checkpoint(sbi, &cpc); } - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + if (!__get_victim(sbi, &segno, gc_type)) goto stop; ret = 0; @@ -719,7 +726,7 @@ gc_more: META_SSA); for (i = 0; i < sbi->segs_per_sec; i++) - do_garbage_collect(sbi, segno + i, &ilist, gc_type); + do_garbage_collect(sbi, segno + i, &gc_list, gc_type); if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -735,7 +742,7 @@ gc_more: stop: mutex_unlock(&sbi->gc_mutex); - put_gc_inode(&ilist); + put_gc_inode(&gc_list); return ret; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 16f0b2b2299..6ff7ad38463 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -40,6 +40,11 @@ struct inode_entry { struct inode *inode; }; +struct gc_inode_list { + struct list_head ilist; + struct radix_tree_root iroot; +}; + /* * inline functions */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 88036fd7579..f2d3c581e77 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -15,35 +15,44 @@ bool f2fs_may_inline(struct inode *inode) { - block_t nr_blocks; - loff_t i_size; - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; if (f2fs_is_atomic_file(inode)) return false; - nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; - if (inode->i_blocks > nr_blocks) + if (!S_ISREG(inode->i_mode)) return false; - i_size = i_size_read(inode); - if (i_size > MAX_INLINE_DATA) + if (i_size_read(inode) > MAX_INLINE_DATA) return false; return true; } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +void read_inline_data(struct page *page, struct page *ipage) { - struct page *ipage; void *src_addr, *dst_addr; - if (page->index) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - goto out; - } + if (PageUptodate(page)) + return; + + f2fs_bug_on(F2FS_P_SB(page), page->index); + + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + + /* Copy the whole inline data block */ + src_addr = inline_data_addr(ipage); + dst_addr = kmap_atomic(page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); + kunmap_atomic(dst_addr); + SetPageUptodate(page); +} + +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ + struct page *ipage; ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { @@ -51,112 +60,116 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) return PTR_ERR(ipage); } - zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + if (!f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + return -EAGAIN; + } - /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); - f2fs_put_page(ipage, 1); + if (page->index) + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + else + read_inline_data(page, ipage); -out: SetPageUptodate(page); + f2fs_put_page(ipage, 1); unlock_page(page); - return 0; } -static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { - int err = 0; - struct page *ipage; - struct dnode_of_data dn; void *src_addr, *dst_addr; block_t new_blk_addr; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; + int dirty, err; - f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out; - } + f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); - /* someone else converted inline_data already */ - if (!f2fs_has_inline_data(inode)) - goto out; + if (!f2fs_exist_data(dn->inode)) + goto clear_out; - /* - * i_addr[0] is not used for inline data, - * so reserving new block will not destroy inline data - */ - set_new_dnode(&dn, inode, ipage, NULL, 0); - err = f2fs_reserve_block(&dn, 0); + err = f2fs_reserve_block(dn, 0); if (err) - goto out; + return err; f2fs_wait_on_page_writeback(page, DATA); + + if (PageUptodate(page)) + goto no_update; + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + src_addr = inline_data_addr(dn->inode_page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + flush_dcache_page(page); + kunmap_atomic(dst_addr); SetPageUptodate(page); +no_update: + /* clear dirty state */ + dirty = clear_page_dirty_for_io(page); /* write data page to try to make data consistent */ set_page_writeback(page); - write_data_page(page, &dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, &dn); + + write_data_page(page, dn, &new_blk_addr, &fio); + update_extent_cache(new_blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); + if (dirty) + inode_dec_dirty_pages(dn->inode); - /* clear inline data and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_dec_inline_inode(inode); + /* this converted inline_data should be recovered. */ + set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); - sync_inode_page(&dn); - f2fs_put_dnode(&dn); -out: - f2fs_unlock_op(sbi); - return err; + /* clear inline data and flag after data writeback */ + truncate_inline_data(dn->inode_page, 0); +clear_out: + stat_dec_inline_inode(dn->inode); + f2fs_clear_inline_inode(dn->inode); + sync_inode_page(dn); + f2fs_put_dnode(dn); + return 0; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, - struct page *page) +int f2fs_convert_inline_inode(struct inode *inode) { - struct page *new_page = page; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + struct page *ipage, *page; + int err = 0; - if (!f2fs_has_inline_data(inode)) - return 0; - else if (to_size <= MAX_INLINE_DATA) - return 0; + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + + f2fs_lock_op(sbi); - if (!page || page->index != 0) { - new_page = grab_cache_page(inode->i_mapping, 0); - if (!new_page) - return -ENOMEM; + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; } - err = __f2fs_convert_inline_data(inode, new_page); - if (!page || page->index != 0) - f2fs_put_page(new_page, 1); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) + err = f2fs_convert_inline_page(&dn, page); + + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + + f2fs_put_page(page, 1); return err; } -int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) +int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; - struct page *ipage; struct dnode_of_data dn; int err; @@ -164,47 +177,39 @@ int f2fs_write_inline_data(struct inode *inode, err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - ipage = dn.inode_page; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - src_addr = kmap(page); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, size); - kunmap(page); - - /* Release the first data block if it is allocated */ if (!f2fs_has_inline_data(inode)) { - truncate_data_blocks_range(&dn, 1); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); + f2fs_put_dnode(&dn); + return -EAGAIN; } + f2fs_bug_on(F2FS_I_SB(inode), page->index); + + f2fs_wait_on_page_writeback(dn.inode_page, NODE); + src_addr = kmap_atomic(page); + dst_addr = inline_data_addr(dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + kunmap_atomic(src_addr); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); f2fs_put_dnode(&dn); - return 0; } -void truncate_inline_data(struct inode *inode, u64 from) +void truncate_inline_data(struct page *ipage, u64 from) { - struct page *ipage; + void *addr; if (from >= MAX_INLINE_DATA) return; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET + from, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + addr = inline_data_addr(ipage); + memset(addr + from, 0, MAX_INLINE_DATA - from); } bool recover_inline_data(struct inode *inode, struct page *npage) @@ -236,6 +241,10 @@ process_inline: src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + update_inode(inode, ipage); f2fs_put_page(ipage, 1); return true; @@ -244,16 +253,279 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + truncate_inline_data(ipage, 0); + f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { truncate_blocks(inode, 0, false); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } return false; } + +struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, + struct qstr *name, struct page **res_page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_inline_dentry *inline_dentry; + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + struct page *ipage; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)inline_dentry, 2); + de = find_target_dentry(name, NULL, &d); + + unlock_page(ipage); + if (de) + *res_page = ipage; + else + f2fs_put_page(ipage, 0); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(sbi, d.max < 0); + return de; +} + +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, + struct page **p) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct f2fs_dir_entry *de; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + dentry_blk = inline_data_addr(ipage); + de = &dentry_blk->dentry[1]; + *p = ipage; + unlock_page(ipage); + return de; +} + +int make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct page *ipage) +{ + struct f2fs_inline_dentry *dentry_blk; + struct f2fs_dentry_ptr d; + + dentry_blk = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)dentry_blk, 2); + do_make_empty_dir(inode, parent, &d); + + set_page_dirty(ipage); + + /* update i_size to MAX_INLINE_DATA */ + if (i_size_read(inode) < MAX_INLINE_DATA) { + i_size_write(inode, MAX_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + } + return 0; +} + +static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + struct page *page; + struct dnode_of_data dn; + struct f2fs_dentry_block *dentry_blk; + int err; + + page = grab_cache_page(dir->i_mapping, 0); + if (!page) + return -ENOMEM; + + set_new_dnode(&dn, dir, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, 0); + if (err) + goto out; + + f2fs_wait_on_page_writeback(page, DATA); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + + dentry_blk = kmap_atomic(page); + + /* copy data from inline dentry block to new dentry block */ + memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, + INLINE_DENTRY_BITMAP_SIZE); + memcpy(dentry_blk->dentry, inline_dentry->dentry, + sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); + memcpy(dentry_blk->filename, inline_dentry->filename, + NR_INLINE_DENTRY * F2FS_SLOT_LEN); + + kunmap_atomic(dentry_blk); + SetPageUptodate(page); + set_page_dirty(page); + + /* clear inline dir and flag after data writeback */ + truncate_inline_data(ipage, 0); + + stat_dec_inline_dir(dir); + clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); + + if (i_size_read(dir) < PAGE_CACHE_SIZE) { + i_size_write(dir, PAGE_CACHE_SIZE); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + sync_inode_page(&dn); +out: + f2fs_put_page(page, 1); + return err; +} + +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, + struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos; + f2fs_hash_t name_hash; + struct f2fs_dir_entry *de; + size_t namelen = name->len; + struct f2fs_inline_dentry *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + name_hash = f2fs_dentry_hash(name); + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + dentry_blk = inline_data_addr(ipage); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_INLINE_DENTRY); + if (bit_pos >= NR_INLINE_DENTRY) { + err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + if (!err) + err = -EAGAIN; + goto out; + } + + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + + f2fs_wait_on_page_writeback(ipage, NODE); + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(ipage); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, 0); +fail: + up_write(&F2FS_I(inode)->i_sem); + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + update_inode(dir, ipage); + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } +out: + f2fs_put_page(ipage, 1); + return err; +} + +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode) +{ + struct f2fs_inline_dentry *inline_dentry; + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + unsigned int bit_pos; + int i; + + lock_page(page); + f2fs_wait_on_page_writeback(page, NODE); + + inline_dentry = inline_data_addr(page); + bit_pos = dentry - inline_dentry->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, + &inline_dentry->dentry_bitmap); + + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode) + f2fs_drop_nlink(dir, inode, page); + + f2fs_put_page(page, 1); +} + +bool f2fs_empty_inline_dir(struct inode *dir) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos = 2; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return false; + + dentry_blk = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + + f2fs_put_page(ipage, 1); + + if (bit_pos < NR_INLINE_DENTRY) + return false; + + return true; +} + +int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) +{ + struct inode *inode = file_inode(file); + struct f2fs_inline_dentry *inline_dentry = NULL; + struct page *ipage = NULL; + struct f2fs_dentry_ptr d; + + if (ctx->pos == NR_INLINE_DENTRY) + return 0; + + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)inline_dentry, 2); + + if (!f2fs_fill_dentries(ctx, &d, 0)) + ctx->pos = NR_INLINE_DENTRY; + + f2fs_put_page(ipage, 1); + return 0; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0deead4505e..196cc7843aa 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static int __recover_inline_status(struct inode *inode, struct page *ipage) +{ + void *inline_data = inline_data_addr(ipage); + struct f2fs_inode *ri; + void *zbuf; + + zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); + if (!zbuf) + return -ENOMEM; + + if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { + kfree(zbuf); + return 0; + } + kfree(zbuf); + + f2fs_wait_on_page_writeback(ipage, NODE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + + ri = F2FS_INODE(ipage); + set_raw_inline(F2FS_I(inode), ri); + set_page_dirty(ipage); + return 0; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; + int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -114,11 +140,19 @@ static int do_read_inode(struct inode *inode) get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); + /* check data exist */ + if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) + err = __recover_inline_status(inode, node_page); + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); f2fs_put_page(node_page, 1); - return 0; + + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + + return err; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) @@ -156,7 +190,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -295,11 +329,12 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); remove_inode_page(inode); - stat_dec_inline_inode(inode); f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); no_delete: + stat_dec_inline_dir(inode); + stat_dec_inline_inode(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -325,8 +360,9 @@ void handle_failed_inode(struct inode *inode) f2fs_truncate(inode); remove_inode_page(inode); - stat_dec_inline_inode(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d2526e5aa1..547a2deeb1a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -54,6 +54,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; goto out; } + + if (f2fs_may_inline(inode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -129,8 +135,12 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); + stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -157,6 +167,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_unlock_op(sbi); d_instantiate(dentry, inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -187,14 +200,12 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page); if (de) { nid_t ino = le32_to_cpu(de->ino); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - - stat_inc_inline_inode(inode); } return d_splice_alias(inode, dentry); @@ -219,15 +230,18 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } - f2fs_delete_entry(de, page, inode); + f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); fail: trace_f2fs_unlink_exit(inode, err); return err; @@ -261,6 +275,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return err; out: handle_failed_inode(inode); @@ -291,11 +308,14 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); + stat_inc_inline_dir(inode); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); unlock_new_inode(inode); + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_fail: @@ -338,8 +358,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -435,7 +459,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - f2fs_delete_entry(old_entry, old_page, NULL); + f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (old_dir_entry) { if (old_dir != new_dir) { @@ -443,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -452,19 +476,22 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; put_out_dir: f2fs_unlock_op(sbi); - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -588,6 +615,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(new_dir); f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_undo: /* Still we may fail to recover name info of f2fs_inode here */ @@ -596,19 +626,19 @@ out_unlock: f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { - kunmap(new_dir_page); + f2fs_dentry_kunmap(new_inode, new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_old: - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 44b8afef43d..f83326ca32e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; + unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; si_meminfo(&val); - /* give 25%, 25%, 50% memory for each components respectively */ + + /* only uses low memory */ + avail_ram = val.totalram - val.totalhigh; + + /* give 25%, 25%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->dirty_exceeded) return false; mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INO_ENTRIES) { + int i; + + if (sbi->sb->s_bdi->dirty_exceeded) + return false; + for (i = 0; i <= UPDATE_INO; i++) + mem_size += (sbi->im[i].ino_num * + sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } return res; } @@ -131,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) return; -retry: + head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); @@ -140,11 +156,7 @@ retry: INIT_LIST_HEAD(&head->set_list); head->set = set; head->entry_cnt = 0; - - if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - cond_resched(); - goto retry; - } + f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } list_move_tail(&ne->list, &head->entry_list); nm_i->dirty_nat_cnt++; @@ -155,7 +167,7 @@ retry: static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry *ne) { - nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; head = radix_tree_lookup(&nm_i->nat_set_root, set); @@ -180,11 +192,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -194,11 +206,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool fsynced = false; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) fsynced = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return fsynced; } @@ -208,13 +220,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -222,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; - new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); nat_reset_flag(new); @@ -241,18 +248,14 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, struct f2fs_nat_entry *ne) { struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } node_info_from_raw_nat(&e->ni, ne); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -260,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } e->ni = *ni; f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -310,7 +309,7 @@ retry: set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -320,7 +319,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (available_free_memory(sbi, NAT_ENTRIES)) return 0; - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; ne = list_first_entry(&nm_i->nat_entries, @@ -328,7 +327,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr_shrink; } @@ -351,14 +350,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); } - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (e) return; @@ -1298,16 +1297,22 @@ static int f2fs_write_node_page(struct page *page, return 0; } - if (wbc->for_reclaim) - goto redirty_out; - - down_read(&sbi->node_write); + if (wbc->for_reclaim) { + if (!down_read_trylock(&sbi->node_write)) + goto redirty_out; + } else { + down_read(&sbi->node_write); + } set_page_writeback(page); write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, NODE, WRITE); + return 0; redirty_out: @@ -1410,13 +1415,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1425,15 +1430,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i->nid = nid; i->state = NID_NEW; + if (radix_tree_preload(GFP_NOFS)) { + kmem_cache_free(free_nid_slab, i); + return 0; + } + spin_lock(&nm_i->free_nid_list_lock); if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); kmem_cache_free(free_nid_slab, i); return 0; } list_add_tail(&i->list, &nm_i->free_nid_list); nm_i->fcnt++; spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); return 1; } @@ -1804,21 +1816,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); raw_ne = nat_in_journal(sum, i); -retry: - write_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne) - goto found; - ne = grab_nat_entry(nm_i, nid); + down_write(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; + ne = grab_nat_entry(nm_i, nid); + node_info_from_raw_nat(&ne->ni, &raw_ne); } - node_info_from_raw_nat(&ne->ni, &raw_ne); -found: __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1889,10 +1895,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); - write_lock(&NM_I(sbi)->nat_tree_lock); + down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - write_unlock(&NM_I(sbi)->nat_tree_lock); + up_write(&NM_I(sbi)->nat_tree_lock); if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); @@ -1903,10 +1909,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, else f2fs_put_page(page, 1); - if (!set->entry_cnt) { - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); - } + f2fs_bug_on(sbi, set->entry_cnt); + + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); } /* @@ -1923,6 +1929,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t set_idx = 0; LIST_HEAD(sets); + if (!nm_i->dirty_nat_cnt) + return; /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1931,9 +1939,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - if (!nm_i->dirty_nat_cnt) - return; - while ((found = __gang_lookup_nat_set(nm_i, set_idx, NATVEC_SIZE, setvec))) { unsigned idx; @@ -1973,13 +1978,13 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); - INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); - INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - rwlock_init(&nm_i->nat_tree_lock); + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2035,7 +2040,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2044,7 +2049,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; @@ -2061,17 +2066,17 @@ int __init create_node_manager_caches(void) free_nid_slab = f2fs_kmem_cache_create("free_nid", sizeof(struct free_nid)); if (!free_nid_slab) - goto destory_nat_entry; + goto destroy_nat_entry; nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) - goto destory_free_nid; + goto destroy_free_nid; return 0; -destory_free_nid: +destroy_free_nid: kmem_cache_destroy(free_nid_slab); -destory_nat_entry: +destroy_nat_entry: kmem_cache_destroy(nat_entry_slab); fail: return -ENOMEM; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8d5e6e0dd84..d10b6448a67 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ - DIRTY_DENTS /* indicates dirty dentry pages */ + DIRTY_DENTS, /* indicates dirty dentry pages */ + INO_ENTRIES, /* indicates inode entries */ }; struct nat_entry_set { @@ -192,10 +193,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - f2fs_clear_bit(block_off, nm_i->nat_bitmap); - else - f2fs_set_bit(block_off, nm_i->nat_bitmap); + f2fs_change_bit(block_off, nm_i->nat_bitmap); } static inline void fill_node_footer(struct page *page, nid_t nid, diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ebd01322578..9160a37e1c7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -111,7 +111,7 @@ retry: iput(einode); goto out_unmap_put; } - f2fs_delete_entry(de, page, einode); + f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; } @@ -129,7 +129,7 @@ retry: goto out; out_unmap_put: - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); out_err: iput(dir); @@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + ra_meta_pages(sbi, blkaddr, 1, META_POR); + while (1) { struct fsync_inode_entry *entry; if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) return 0; - page = get_meta_page_ra(sbi, blkaddr); + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) break; @@ -227,6 +229,8 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + ra_meta_pages_cond(sbi, blkaddr); } f2fs_put_page(page, 1); return err; @@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi, if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) break; - page = get_meta_page_ra(sbi, blkaddr); + ra_meta_pages_cond(sbi, blkaddr); + + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) { f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 923cb76fdc4..42607a67992 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -178,17 +178,47 @@ void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; + int err; + + SetPagePrivate(page); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ new->page = page; INIT_LIST_HEAD(&new->list); - +retry: /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); + err = radix_tree_insert(&fi->inmem_root, page->index, new); + if (err == -EEXIST) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + return; + } else if (err) { + mutex_unlock(&fi->inmem_lock); + goto retry; + } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + mutex_unlock(&fi->inmem_lock); +} + +void invalidate_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *cur; + + mutex_lock(&fi->inmem_lock); + cur = radix_tree_lookup(&fi->inmem_root, page->index); + if (cur) { + radix_tree_delete(&fi->inmem_root, cur->page->index); + f2fs_put_page(cur->page, 0); + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + } mutex_unlock(&fi->inmem_lock); } @@ -203,7 +233,16 @@ void commit_inmem_pages(struct inode *inode, bool abort) .rw = WRITE_SYNC, }; - f2fs_balance_fs(sbi); + /* + * The abort is true only when f2fs_evict_inode is called. + * Basically, the f2fs_evict_inode doesn't produce any data writes, so + * that we don't need to call f2fs_balance_fs. + * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this + * inode becomes free by iget_locked in f2fs_iget. + */ + if (!abort) + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); mutex_lock(&fi->inmem_lock); @@ -216,9 +255,11 @@ void commit_inmem_pages(struct inode *inode, bool abort) do_write_data_page(cur->page, &fio); submit_bio = true; } + radix_tree_delete(&fi->inmem_root, cur->page->index); f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } if (submit_bio) f2fs_submit_merged_bio(sbi, DATA, WRITE); @@ -248,7 +289,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || - excess_prefree_segs(sbi)) + excess_prefree_segs(sbi) || + available_free_memory(sbi, INO_ENTRIES)) f2fs_sync_fs(sbi->sb, true); } @@ -441,10 +483,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static void __add_discard_entry(struct f2fs_sb_info *sbi, + struct cp_control *cpc, unsigned int start, unsigned int end) { struct list_head *head = &SM_I(sbi)->discard_list; - struct discard_entry *new; + struct discard_entry *new, *last; + + if (!list_empty(head)) { + last = list_last_entry(head, struct discard_entry, list); + if (START_BLOCK(sbi, cpc->trim_start) + start == + last->blkaddr + last->len) { + last->len += end - start; + goto done; + } + } + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; + new->len = end - start; + list_add_tail(&new->list, head); +done: + SM_I(sbi)->nr_discards += end - start; + cpc->trimmed += end - start; +} + +static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); @@ -473,13 +538,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) } mutex_unlock(&dirty_i->seglist_lock); - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start); - new->len = sbi->blocks_per_seg; - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += sbi->blocks_per_seg; - cpc->trimmed += sbi->blocks_per_seg; + __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg); return; } @@ -489,7 +548,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + dmap[i] = ~(cur_map[i] | ckpt_map[i]); while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); @@ -501,14 +560,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (end - start < cpc->trim_minlen) continue; - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - cpc->trimmed += end - start; - - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += end - start; + __add_discard_entry(sbi, cpc, start, end); } } @@ -620,10 +672,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -1004,6 +1056,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) range->len < sbi->blocksize) return -EINVAL; + cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -1015,10 +1068,11 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_start = start_segno; cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; - cpc.trimmed = 0; /* do checkpoint to issue discard commands safely */ + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); out: range->len = cpc.trimmed << sbi->log_blocksize; return 0; @@ -1050,8 +1104,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; + if (IS_DNODE(page) && is_cold_node(page)) + return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } @@ -1524,17 +1578,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); + return get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -1687,7 +1731,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page; + struct page *page = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, @@ -2200,7 +2244,7 @@ int __init create_segment_manager_caches(void) goto fail; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", - sizeof(struct nat_entry_set)); + sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destory_discard_entry; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2495bec1c62..7f327c0ba4e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { unsigned int block_off = SIT_BLOCK_OFFSET(start); - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); + f2fs_change_bit(block_off, sit_i->sit_bitmap); } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) @@ -714,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { + if (sbi->sb->s_bdi->dirty_exceeded) + return 0; + if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41d6f700f4e..f71421d7047 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -51,8 +51,10 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_inline_data, + Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, + Opt_fastboot, Opt_err, }; @@ -69,8 +71,10 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, + {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, + {Opt_fastboot, "fastboot"}, {Opt_err, NULL}, }; @@ -340,12 +344,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_data: set_opt(sbi, INLINE_DATA); break; + case Opt_inline_dentry: + set_opt(sbi, INLINE_DENTRY); + break; case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -373,6 +383,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; rwlock_init(&fi->ext.ext_lock); init_rwsem(&fi->i_sem); + INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); @@ -473,9 +484,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); if (sync) { - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; + + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -562,10 +573,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + if (test_opt(sbi, INLINE_DENTRY)) + seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + if (test_opt(sbi, FASTBOOT)) + seq_puts(seq, ",fastboot"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; @@ -654,7 +669,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_sync_fs(sb, 1); need_restart_gc = true; } - } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + } else if (!sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; @@ -667,7 +682,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { destroy_flush_cmd_control(sbi); - } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { + } else if (!SM_I(sbi)->cmd_control_info) { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; @@ -922,7 +937,7 @@ retry: static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super; + struct f2fs_super_block *raw_super = NULL; struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; @@ -1123,7 +1138,7 @@ try_onemore: * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (!f2fs_readonly(sb)) { + if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index deca8728117..5072bf9ae0e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -398,7 +398,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size) + void *buffer, size_t buffer_size, struct page *ipage) { struct f2fs_xattr_entry *entry; void *base_addr; @@ -412,7 +412,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - base_addr = read_all_xattrs(inode, NULL); + base_addr = read_all_xattrs(inode, ipage); if (!base_addr) return -ENOMEM; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 34ab7dbcf5e..969d792ca36 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -115,7 +115,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else @@ -126,7 +127,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index, return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, - const char *name, void *buffer, size_t buffer_size) + const char *name, void *buffer, + size_t buffer_size, struct page *dpage) { return -EOPNOTSUPP; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 3963ede84eb..c5d6bb939d1 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -702,10 +702,11 @@ static int fat_readdir(struct file *file, struct dir_context *ctx) } #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ -static int func(void *__buf, const char *name, int name_len, \ +static int func(struct dir_context *ctx, const char *name, int name_len, \ loff_t offset, u64 ino, unsigned int d_type) \ { \ - struct fat_ioctl_filldir_callback *buf = __buf; \ + struct fat_ioctl_filldir_callback *buf = \ + container_of(ctx, struct fat_ioctl_filldir_callback, ctx); \ struct dirent_type __user *d1 = buf->dirent; \ struct dirent_type __user *d2 = d1 + 1; \ \ diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6df8d3d885e..b8b92c2f968 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -736,7 +736,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, } alias = d_find_alias(inode); - if (alias && !vfat_d_anon_disconn(alias)) { + /* + * Checking "alias->d_parent == dentry->d_parent" to make sure + * FS is not corrupted (especially double linked dir). + */ + if (alias && alias->d_parent == dentry->d_parent && + !vfat_d_anon_disconn(alias)) { /* * This inode has non anonymous-DCACHE_DISCONNECTED * dentry. This means, the user did ->lookup() by an @@ -755,12 +760,9 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: mutex_unlock(&MSDOS_SB(sb)->s_lock); - dentry->d_time = dentry->d_parent->d_inode->i_version; - dentry = d_splice_alias(inode, dentry); - if (dentry) - dentry->d_time = dentry->d_parent->d_inode->i_version; - return dentry; - + if (!inode) + dentry->d_time = dir->i_version; + return d_splice_alias(inode, dentry); error: mutex_unlock(&MSDOS_SB(sb)->s_lock); return ERR_PTR(err); @@ -793,7 +795,6 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -824,6 +825,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); + dentry->d_time = dir->i_version; out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -849,6 +851,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); + dentry->d_time = dir->i_version; out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -889,7 +892,6 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); mutex_unlock(&MSDOS_SB(sb)->s_lock); diff --git a/fs/file.c b/fs/file.c index ab3eb6a8823..ee738ea028f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -869,7 +869,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) struct file *file = fget_raw(fildes); if (file) { - ret = get_unused_fd(); + ret = get_unused_fd_flags(0); if (ret >= 0) fd_install(ret, file); else diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dbab798f5ca..df562cc8776 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -372,7 +372,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, if (inode && get_node_id(inode) == FUSE_ROOT_ID) goto out_iput; - newent = d_materialise_unique(entry, inode); + newent = d_splice_alias(inode, entry); err = PTR_ERR(newent); if (IS_ERR(newent)) goto out_err; @@ -1320,7 +1320,7 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); + alias = d_splice_alias(inode, dentry); err = PTR_ERR(alias); if (IS_ERR(alias)) goto out; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index caa8d95b24e..bf50259012a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1988,7 +1988,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { pgoff_t index = pos >> PAGE_CACHE_SHIFT; - struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode); + struct fuse_conn *fc = get_fuse_conn(file_inode(file)); struct page *page; loff_t fsize; int err = -ENOMEM; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 5d4261ff5d2..c5a34f09e22 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -365,23 +365,17 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) ret = gfs2_dir_read_data(ip, hc, hsize); if (ret < 0) { - if (is_vmalloc_addr(hc)) - vfree(hc); - else - kfree(hc); + kvfree(hc); return ERR_PTR(ret); } spin_lock(&inode->i_lock); - if (ip->i_hash_cache) { - if (is_vmalloc_addr(hc)) - vfree(hc); - else - kfree(hc); - } else { + if (likely(!ip->i_hash_cache)) { ip->i_hash_cache = hc; + hc = NULL; } spin_unlock(&inode->i_lock); + kvfree(hc); return ip->i_hash_cache; } @@ -396,10 +390,7 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { __be64 *hc = ip->i_hash_cache; ip->i_hash_cache = NULL; - if (is_vmalloc_addr(hc)) - vfree(hc); - else - kfree(hc); + kvfree(hc); } static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) @@ -1168,10 +1159,7 @@ fail: gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_kfree: - if (is_vmalloc_addr(hc2)) - vfree(hc2); - else - kfree(hc2); + kvfree(hc2); return error; } @@ -1302,14 +1290,6 @@ static void *gfs2_alloc_sort_buffer(unsigned size) return ptr; } -static void gfs2_free_sort_buffer(void *ptr) -{ - if (is_vmalloc_addr(ptr)) - vfree(ptr); - else - kfree(ptr); -} - static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, int *copied, unsigned *depth, u64 leaf_no) @@ -1393,7 +1373,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, out_free: for(i = 0; i < leaf; i++) brelse(larr[i]); - gfs2_free_sort_buffer(larr); + kvfree(larr); out: return error; } @@ -2004,10 +1984,7 @@ out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); out: - if (is_vmalloc_addr(ht)) - vfree(ht); - else - kfree(ht); + kvfree(ht); return error; } diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 8b9b3775e2e..c41d255b6a7 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -69,10 +69,12 @@ struct get_name_filldir { char *name; }; -static int get_name_filldir(void *opaque, const char *name, int length, - loff_t offset, u64 inum, unsigned int type) +static int get_name_filldir(struct dir_context *ctx, const char *name, + int length, loff_t offset, u64 inum, + unsigned int type) { - struct get_name_filldir *gnfd = opaque; + struct get_name_filldir *gnfd = + container_of(ctx, struct get_name_filldir, ctx); if (inum != gnfd->inum.no_addr) return 0; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 80dd44dca02..6e600abf694 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -337,7 +337,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; int hint = min_t(size_t, INT_MAX, blks); - atomic_set(&ip->i_res->rs_sizehint, hint); + if (hint > atomic_read(&ip->i_res->rs_sizehint)) + atomic_set(&ip->i_res->rs_sizehint, hint); } /** @@ -728,7 +729,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; - loff_t size = len; unsigned int nr_blks; sector_t lblock = offset >> inode->i_blkbits; @@ -762,11 +762,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, goto out; } } - if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) - i_size_write(inode, offset + size); - - mark_inode_dirty(inode); - out: brelse(dibh); return error; @@ -796,8 +791,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, } } -static long gfs2_fallocate(struct file *file, int mode, loff_t offset, - loff_t len) +static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -811,14 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; - struct gfs2_holder gh; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - return -EOPNOTSUPP; - offset &= bsize_mask; len = next - offset; @@ -829,17 +818,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; - error = gfs2_rs_alloc(ip); - if (error) - return error; - - mutex_lock(&inode->i_mutex); - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - error = gfs2_glock_nq(&gh); - if (unlikely(error)) - goto out_uninit; - gfs2_size_hint(file, offset, len); while (len > 0) { @@ -852,8 +830,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, } error = gfs2_quota_lock_check(ip); if (error) - goto out_unlock; - + return error; retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); @@ -895,20 +872,64 @@ retry: gfs2_quota_unlock(ip); } - if (error == 0) - error = generic_write_sync(file, pos, count); - goto out_unlock; + if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) { + i_size_write(inode, pos + count); + /* Marks the inode as dirty */ + file_update_time(file); + } + + return generic_write_sync(file, pos, count); out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); + return error; +} + +static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(file); + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int ret; + + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + mutex_lock(&inode->i_mutex); + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + (offset + len) > inode->i_size) { + ret = inode_newsize_ok(inode, offset + len); + if (ret) + goto out_unlock; + } + + ret = get_write_access(inode); + if (ret) + goto out_unlock; + + ret = gfs2_rs_alloc(ip); + if (ret) + goto out_putw; + + ret = __gfs2_fallocate(file, mode, offset, len); + if (ret) + gfs2_rs_deltree(ip->i_res); +out_putw: + put_write_access(inode); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); mutex_unlock(&inode->i_mutex); - return error; + return ret; } #ifdef CONFIG_GFS2_FS_LOCKING_DLM diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8f0c19d1d94..a23524aa3ea 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -836,8 +836,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * gh->gh_flags = flags; gh->gh_iflags = 0; gh->gh_ip = _RET_IP_; - if (gh->gh_owner_pid) - put_pid(gh->gh_owner_pid); + put_pid(gh->gh_owner_pid); gh->gh_owner_pid = get_pid(task_pid(current)); } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 1cc0bba6313..fe91951c336 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -28,6 +28,8 @@ #include "trans.h" #include "dir.h" +struct workqueue_struct *gfs2_freeze_wq; + static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", @@ -94,11 +96,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = _RET_IP_; - sb_start_intwrite(sdp->sd_vfs); - if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { - sb_end_intwrite(sdp->sd_vfs); + if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) return; - } WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; @@ -469,20 +468,19 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) static void freeze_go_sync(struct gfs2_glock *gl) { + int error = 0; struct gfs2_sbd *sdp = gl->gl_sbd; - DEFINE_WAIT(wait); if (gl->gl_state == LM_ST_SHARED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - atomic_set(&sdp->sd_log_freeze, 1); - wake_up(&sdp->sd_logd_waitq); - do { - prepare_to_wait(&sdp->sd_log_frozen_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&sdp->sd_log_freeze)) - io_schedule(); - } while(atomic_read(&sdp->sd_log_freeze)); - finish_wait(&sdp->sd_log_frozen_wait, &wait); + atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); + error = freeze_super(sdp->sd_vfs); + if (error) { + printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error); + gfs2_assert_withdraw(sdp, 0); + } + queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); + gfs2_log_flush(sdp, NULL, FREEZE_FLUSH); } } diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index 7455d2629bc..8ed1857c1a8 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -12,6 +12,8 @@ #include "incore.h" +extern struct workqueue_struct *gfs2_freeze_wq; + extern const struct gfs2_glock_operations gfs2_meta_glops; extern const struct gfs2_glock_operations gfs2_inode_glops; extern const struct gfs2_glock_operations gfs2_rgrp_glops; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 39e7e9959b7..7a2dbbc0d63 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -97,6 +97,7 @@ struct gfs2_rgrpd { #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ +#define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct rb_root rd_rstree; /* multi-block reservation tree */ @@ -587,6 +588,12 @@ enum { SDF_SKIP_DLM_UNLOCK = 8, }; +enum gfs2_freeze_state { + SFS_UNFROZEN = 0, + SFS_STARTING_FREEZE = 1, + SFS_FROZEN = 2, +}; + #define GFS2_FSNAME_LEN 256 struct gfs2_inum_host { @@ -684,6 +691,7 @@ struct gfs2_sbd { struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_freeze_gl; + struct work_struct sd_freeze_work; wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; @@ -788,6 +796,9 @@ struct gfs2_sbd { wait_queue_head_t sd_log_flush_wait; int sd_log_error; + atomic_t sd_reserving_log; + wait_queue_head_t sd_reserving_log_wait; + unsigned int sd_log_flush_head; u64 sd_log_flush_wrapped; @@ -797,12 +808,8 @@ struct gfs2_sbd { /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; - struct gfs2_holder sd_freeze_root_gh; - struct gfs2_holder sd_thaw_gh; - atomic_t sd_log_freeze; - atomic_t sd_frozen_root; - wait_queue_head_t sd_frozen_root_wait; - wait_queue_head_t sd_log_frozen_wait; + atomic_t sd_freeze_state; + struct mutex sd_freeze_mutex; char sd_fsname[GFS2_FSNAME_LEN]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c4ed823d150..9054002ebe7 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -596,7 +596,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - struct dentry *d; int error, free_vfs_inode = 0; u32 aflags = 0; unsigned blocks = 1; @@ -624,22 +623,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl); error = PTR_ERR(inode); if (!IS_ERR(inode)) { - d = d_splice_alias(inode, dentry); - error = PTR_ERR(d); - if (IS_ERR(d)) { - inode = ERR_CAST(d); + if (S_ISDIR(inode->i_mode)) { + iput(inode); + inode = ERR_PTR(-EISDIR); goto fail_gunlock; } + d_instantiate(dentry, inode); error = 0; if (file) { - if (S_ISREG(inode->i_mode)) { - WARN_ON(d != NULL); + if (S_ISREG(inode->i_mode)) error = finish_open(file, dentry, gfs2_open_common, opened); - } else { - error = finish_no_open(file, d); - } - } else { - dput(d); + else + error = finish_no_open(file, NULL); } gfs2_glock_dq_uninit(ghs); return error; @@ -1045,11 +1040,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (error) return error; - error = gfs2_dir_check(&dip->i_inode, name, ip); - if (error) - return error; - - return 0; + return gfs2_dir_check(&dip->i_inode, name, ip); } /** @@ -1254,11 +1245,8 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (d != NULL) dentry = d; if (dentry->d_inode) { - if (!(*opened & FILE_OPENED)) { - if (d == NULL) - dget(dentry); - return finish_no_open(file, dentry); - } + if (!(*opened & FILE_OPENED)) + return finish_no_open(file, d); dput(d); return 0; } @@ -1622,26 +1610,18 @@ int gfs2_permission(struct inode *inode, int mask) { struct gfs2_inode *ip; struct gfs2_holder i_gh; - struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; - int frozen_root = 0; ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && - inode == sdp->sd_root_dir->d_inode && - atomic_inc_not_zero(&sdp->sd_frozen_root))) - frozen_root = 1; - else { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return error; - unlock = 1; - } + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + unlock = 1; } if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) @@ -1650,8 +1630,6 @@ int gfs2_permission(struct inode *inode, int mask) error = generic_permission(inode, mask); if (unlock) gfs2_glock_dq_uninit(&i_gh); - else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) - wake_up(&sdp->sd_frozen_root_wait); return error; } @@ -1824,29 +1802,19 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *inode = dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; - struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; - int frozen_root = 0; if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && - inode == sdp->sd_root_dir->d_inode && - atomic_inc_not_zero(&sdp->sd_frozen_root))) - frozen_root = 1; - else { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) - return error; - unlock = 1; - } + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) + return error; + unlock = 1; } generic_fillattr(inode, stat); if (unlock) gfs2_glock_dq_uninit(&gh); - else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) - wake_up(&sdp->sd_frozen_root_wait); return 0; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3966fadbceb..536e7a6252c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -339,6 +339,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) { + int ret = 0; unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); unsigned wanted = blks + reserved_blks; DEFINE_WAIT(wait); @@ -362,9 +363,13 @@ retry: } while(free_blocks <= wanted); finish_wait(&sdp->sd_log_waitq, &wait); } + atomic_inc(&sdp->sd_reserving_log); if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks, - free_blocks - blks) != free_blocks) + free_blocks - blks) != free_blocks) { + if (atomic_dec_and_test(&sdp->sd_reserving_log)) + wake_up(&sdp->sd_reserving_log_wait); goto retry; + } trace_gfs2_log_blocks(sdp, -blks); /* @@ -377,9 +382,11 @@ retry: down_read(&sdp->sd_log_flush_lock); if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { gfs2_log_release(sdp, blks); - return -EROFS; + ret = -EROFS; } - return 0; + if (atomic_dec_and_test(&sdp->sd_reserving_log)) + wake_up(&sdp->sd_reserving_log_wait); + return ret; } /** @@ -652,9 +659,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) u32 hash; int rw = WRITE_FLUSH_FUA | REQ_META; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); + enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); clear_page(lh); + gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); + tail = current_tail(sdp); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); @@ -695,6 +705,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, enum gfs2_flush_type type) { struct gfs2_trans *tr; + enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); down_write(&sdp->sd_log_flush_lock); @@ -713,8 +724,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, INIT_LIST_HEAD(&tr->tr_ail1_list); INIT_LIST_HEAD(&tr->tr_ail2_list); tr->tr_first = sdp->sd_log_flush_head; + if (unlikely (state == SFS_FROZEN)) + gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new); } + if (unlikely(state == SFS_FROZEN)) + gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); @@ -745,8 +760,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); - if (atomic_read(&sdp->sd_log_freeze)) - type = FREEZE_FLUSH; if (type != NORMAL_FLUSH) { if (!sdp->sd_log_idle) { for (;;) { @@ -763,21 +776,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, } if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH) gfs2_log_shutdown(sdp); - if (type == FREEZE_FLUSH) { - int error; - - atomic_set(&sdp->sd_log_freeze, 0); - wake_up(&sdp->sd_log_frozen_wait); - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, 0, - &sdp->sd_thaw_gh); - if (error) { - printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); - gfs2_assert_withdraw(sdp, 0); - } - else - gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); - } + if (type == FREEZE_FLUSH) + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } trace_gfs2_log_flush(sdp, 0); @@ -888,7 +888,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) { - return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze)); + return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); } static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 82b6ac82965..241a399bf83 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -30,6 +30,7 @@ #include "quota.h" #include "recovery.h" #include "dir.h" +#include "glops.h" struct workqueue_struct *gfs2_control_wq; @@ -161,9 +162,14 @@ static int __init init_gfs2_fs(void) if (!gfs2_control_wq) goto fail_recovery; + gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0); + + if (!gfs2_freeze_wq) + goto fail_control; + gfs2_page_pool = mempool_create_page_pool(64, 0); if (!gfs2_page_pool) - goto fail_control; + goto fail_freeze; gfs2_register_debugfs(); @@ -171,6 +177,8 @@ static int __init init_gfs2_fs(void) return 0; +fail_freeze: + destroy_workqueue(gfs2_freeze_wq); fail_control: destroy_workqueue(gfs2_control_wq); fail_recovery: @@ -224,6 +232,7 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); destroy_workqueue(gfs2_control_wq); + destroy_workqueue(gfs2_freeze_wq); list_lru_destroy(&gfs2_qd_lru); rcu_barrier(); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index d3eae244076..8633ad328ee 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -129,11 +129,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); + atomic_set(&sdp->sd_reserving_log, 0); + init_waitqueue_head(&sdp->sd_reserving_log_wait); init_waitqueue_head(&sdp->sd_log_flush_wait); - init_waitqueue_head(&sdp->sd_log_frozen_wait); - atomic_set(&sdp->sd_log_freeze, 0); - atomic_set(&sdp->sd_frozen_root, 0); - init_waitqueue_head(&sdp->sd_frozen_root_wait); + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + mutex_init(&sdp->sd_freeze_mutex); return sdp; } @@ -760,15 +760,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - if (!sdp->sd_args.ar_spectator) { - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, - &sdp->sd_thaw_gh); - if (error) { - fs_err(sdp, "can't acquire freeze glock: %d\n", error); - goto fail_jinode_gh; - } - } - gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); + INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func); return 0; fail_jinode_gh: @@ -1082,6 +1074,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sb->s_export_op = &gfs2_export_ops; sb->s_xattr = gfs2_xattr_handlers; sb->s_qcop = &gfs2_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; sb->s_time_gran = 1; sb->s_maxbytes = MAX_LFS_FILESIZE; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 64b29f7f6b4..c8b148bbdc8 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1360,13 +1360,8 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); - if (sdp->sd_quota_bitmap) { - if (is_vmalloc_addr(sdp->sd_quota_bitmap)) - vfree(sdp->sd_quota_bitmap); - else - kfree(sdp->sd_quota_bitmap); - sdp->sd_quota_bitmap = NULL; - } + kvfree(sdp->sd_quota_bitmap); + sdp->sd_quota_bitmap = NULL; } static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7474c413ffd..9150207f365 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -936,7 +936,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; - rgd->rd_flags &= ~GFS2_RDF_UPTODATE; + rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; spin_lock(&sdp->sd_rindex_spin); @@ -955,6 +955,36 @@ fail: } /** + * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use + * @sdp: the GFS2 superblock + * + * The purpose of this function is to select a subset of the resource groups + * and mark them as PREFERRED. We do it in such a way that each node prefers + * to use a unique set of rgrps to minimize glock contention. + */ +static void set_rgrp_preferences(struct gfs2_sbd *sdp) +{ + struct gfs2_rgrpd *rgd, *first; + int i; + + /* Skip an initial number of rgrps, based on this node's journal ID. + That should start each node out on its own set. */ + rgd = gfs2_rgrpd_get_first(sdp); + for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++) + rgd = gfs2_rgrpd_get_next(rgd); + first = rgd; + + do { + rgd->rd_flags |= GFS2_RDF_PREFERRED; + for (i = 0; i < sdp->sd_journals; i++) { + rgd = gfs2_rgrpd_get_next(rgd); + if (rgd == first) + break; + } + } while (rgd != first); +} + +/** * gfs2_ri_update - Pull in a new resource index from the disk * @ip: pointer to the rindex inode * @@ -973,6 +1003,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip) if (error < 0) return error; + set_rgrp_preferences(sdp); + sdp->sd_rindex_uptodate = 1; return 0; } @@ -1891,6 +1923,25 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b } /** + * fast_to_acquire - determine if a resource group will be fast to acquire + * + * If this is one of our preferred rgrps, it should be quicker to acquire, + * because we tried to set ourselves up as dlm lock master. + */ +static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) +{ + struct gfs2_glock *gl = rgd->rd_gl; + + if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) && + !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + return 1; + if (rgd->rd_flags & GFS2_RDF_PREFERRED) + return 1; + return 0; +} + +/** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for * @ap: the allocation parameters @@ -1932,10 +1983,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a rg_locked = 0; if (skip && skip--) goto next_rgrp; - if (!gfs2_rs_active(rs) && (loops < 2) && - gfs2_rgrp_used_recently(rs, 1000) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) - goto next_rgrp; + if (!gfs2_rs_active(rs)) { + if (loops == 0 && + !fast_to_acquire(rs->rs_rbm.rgd)) + goto next_rgrp; + if ((loops < 2) && + gfs2_rgrp_used_recently(rs, 1000) && + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + goto next_rgrp; + } error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); @@ -2195,6 +2251,9 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_free && !ret) goto out; + /* We used up our block reservation, so we should + reserve more blocks next time. */ + atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint); } __rs_deltree(rs); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 5d8f085f7ad..b104f4af3af 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -20,6 +20,7 @@ */ #define RGRP_RSRV_MINBYTES 8 #define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY)) +#define RGRP_RSRV_ADDBLKS 64 struct gfs2_rgrpd; struct gfs2_sbd; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a346f56c4c6..5b327f837de 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -26,6 +26,7 @@ #include <linux/wait.h> #include <linux/writeback.h> #include <linux/backing-dev.h> +#include <linux/kernel.h> #include "gfs2.h" #include "incore.h" @@ -399,7 +400,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_holder thaw_gh; + struct gfs2_holder freeze_gh; struct gfs2_log_header_host head; int error; @@ -408,7 +409,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) return error; error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, - &thaw_gh); + &freeze_gh); if (error) goto fail_threads; @@ -434,13 +435,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_glock_dq_uninit(&thaw_gh); + gfs2_glock_dq_uninit(&freeze_gh); return 0; fail: - thaw_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&thaw_gh); + freeze_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&freeze_gh); fail_threads: kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); @@ -580,14 +581,15 @@ int gfs2_statfs_sync(struct super_block *sb, int type) struct buffer_head *m_bh, *l_bh; int error; + sb_start_write(sb); error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &gh); if (error) - return error; + goto out; error = gfs2_meta_inode_buffer(m_ip, &m_bh); if (error) - goto out; + goto out_unlock; spin_lock(&sdp->sd_statfs_spin); gfs2_statfs_change_in(m_sc, m_bh->b_data + @@ -615,8 +617,10 @@ out_bh2: brelse(l_bh); out_bh: brelse(m_bh); -out: +out_unlock: gfs2_glock_dq_uninit(&gh); +out: + sb_end_write(sb); return error; } @@ -643,14 +647,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; - struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode); int error; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, - &sdp->sd_freeze_root_gh); - if (error) - return error; - atomic_set(&sdp->sd_frozen_root, 1); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); if (!lfcc) { @@ -692,11 +690,6 @@ out: gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); } - if (error) { - atomic_dec(&sdp->sd_frozen_root); - wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); - gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); - } return error; } @@ -834,18 +827,14 @@ out: static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - struct gfs2_holder thaw_gh; + struct gfs2_holder freeze_gh; int error; error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, - &thaw_gh); + &freeze_gh); if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; - down_write(&sdp->sd_log_flush_lock); - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - up_write(&sdp->sd_log_flush_lock); - kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); @@ -853,11 +842,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); + down_write(&sdp->sd_log_flush_lock); + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + up_write(&sdp->sd_log_flush_lock); + gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH); + wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); - if (thaw_gh.gh_gl) - gfs2_glock_dq_uninit(&thaw_gh); + if (freeze_gh.gh_gl) + gfs2_glock_dq_uninit(&freeze_gh); gfs2_quota_cleanup(sdp); @@ -943,11 +937,41 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_quota_sync(sb, -1); - if (wait && sdp && !atomic_read(&sdp->sd_log_freeze)) + if (wait && sdp) gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); return 0; } +void gfs2_freeze_func(struct work_struct *work) +{ + int error; + struct gfs2_holder freeze_gh; + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work); + struct super_block *sb = sdp->sd_vfs; + + atomic_inc(&sb->s_active); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &freeze_gh); + if (error) { + printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); + gfs2_assert_withdraw(sdp, 0); + } + else { + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + error = thaw_super(sb); + if (error) { + printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n", + error); + gfs2_assert_withdraw(sdp, 0); + } + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + freeze_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&freeze_gh); + } + deactivate_super(sb); + return; +} + /** * gfs2_freeze - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem @@ -957,10 +981,16 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) static int gfs2_freeze(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; - int error; + int error = 0; - if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return -EINVAL; + mutex_lock(&sdp->sd_freeze_mutex); + if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) + goto out; + + if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + error = -EINVAL; + goto out; + } for (;;) { error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); @@ -980,7 +1010,10 @@ static int gfs2_freeze(struct super_block *sb) fs_err(sdp, "retrying...\n"); msleep(1000); } - return 0; + error = 0; +out: + mutex_unlock(&sdp->sd_freeze_mutex); + return error; } /** @@ -993,10 +1026,15 @@ static int gfs2_unfreeze(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; + mutex_lock(&sdp->sd_freeze_mutex); + if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || + sdp->sd_freeze_gh.gh_gl == NULL) { + mutex_unlock(&sdp->sd_freeze_mutex); + return 0; + } + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); - atomic_dec(&sdp->sd_frozen_root); - wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); - gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); + mutex_unlock(&sdp->sd_freeze_mutex); return 0; } @@ -1618,8 +1656,8 @@ const struct super_operations gfs2_super_ops = { .evict_inode = gfs2_evict_inode, .put_super = gfs2_put_super, .sync_fs = gfs2_sync_fs, - .freeze_fs = gfs2_freeze, - .unfreeze_fs = gfs2_unfreeze, + .freeze_super = gfs2_freeze, + .thaw_super = gfs2_unfreeze, .statfs = gfs2_statfs, .remount_fs = gfs2_remount_fs, .drop_inode = gfs2_drop_inode, diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 90e3322ffa1..73c97dccae2 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -45,6 +45,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct buffer_head *l_bh); extern int gfs2_statfs_sync(struct super_block *sb, int type); +extern void gfs2_freeze_func(struct work_struct *work); extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 42bfd336197..88bff243066 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -89,14 +89,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) { struct gfs2_trans *tr = current->journal_info; s64 nbuf; + int alloced = tr->tr_alloced; + BUG_ON(!tr); current->journal_info = NULL; if (!tr->tr_touched) { gfs2_log_release(sdp, tr->tr_reserved); - if (tr->tr_alloced) + if (alloced) { kfree(tr); - sb_end_intwrite(sdp->sd_vfs); + sb_end_intwrite(sdp->sd_vfs); + } return; } @@ -109,13 +112,14 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_print_trans(tr); gfs2_log_commit(sdp, tr); - if (tr->tr_alloced && !tr->tr_attached) + if (alloced && !tr->tr_attached) kfree(tr); up_read(&sdp->sd_log_flush_lock); if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); - sb_end_intwrite(sdp->sd_vfs); + if (alloced) + sb_end_intwrite(sdp->sd_vfs); } static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, @@ -192,6 +196,7 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { struct gfs2_meta_header *mh; struct gfs2_trans *tr; + enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); tr = current->journal_info; tr->tr_touched = 1; @@ -205,6 +210,10 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) (unsigned long long)bd->bd_bh->b_blocknr); BUG(); } + if (unlikely(state == SFS_FROZEN)) { + printk(KERN_INFO "GFS2:adding buf while frozen\n"); + gfs2_assert_withdraw(sdp, 0); + } gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index ff0316b925a..db458ee3a54 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -162,14 +162,16 @@ err2: */ int hfs_cat_keycmp(const btree_key *key1, const btree_key *key2) { - int retval; + __be32 k1p, k2p; - retval = be32_to_cpu(key1->cat.ParID) - be32_to_cpu(key2->cat.ParID); - if (!retval) - retval = hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len, - key2->cat.CName.name, key2->cat.CName.len); + k1p = key1->cat.ParID; + k2p = key2->cat.ParID; - return retval; + if (k1p != k2p) + return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1; + + return hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len, + key2->cat.CName.name, key2->cat.CName.len); } /* Try to get a catalog entry for given catalog id */ diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 4338ff32959..5f2755117ce 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -548,10 +548,11 @@ struct hppfs_dirent { struct dentry *dentry; }; -static int hppfs_filldir(void *d, const char *name, int size, +static int hppfs_filldir(struct dir_context *ctx, const char *name, int size, loff_t offset, u64 inode, unsigned int type) { - struct hppfs_dirent *dirent = d; + struct hppfs_dirent *dirent = + container_of(ctx, struct hppfs_dirent, ctx); if (file_removed(dirent->dentry, name)) return 0; diff --git a/fs/inode.c b/fs/inode.c index 26753ba7b6d..2ed95f7caa4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -143,9 +143,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_blocks = 0; inode->i_bytes = 0; inode->i_generation = 0; -#ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); -#endif inode->i_pipe = NULL; inode->i_bdev = NULL; inode->i_cdev = NULL; diff --git a/fs/ioctl.c b/fs/ioctl.c index 8ac3fad3619..77c9a781254 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -518,10 +518,12 @@ static int ioctl_fsfreeze(struct file *filp) return -EPERM; /* If filesystem doesn't support freeze feature, return. */ - if (sb->s_op->freeze_fs == NULL) + if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL) return -EOPNOTSUPP; /* Freeze */ + if (sb->s_op->freeze_super) + return sb->s_op->freeze_super(sb); return freeze_super(sb); } @@ -533,6 +535,8 @@ static int ioctl_fsthaw(struct file *filp) return -EPERM; /* Thaw */ + if (sb->s_op->thaw_super) + return sb->s_op->thaw_super(sb); return thaw_super(sb); } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index fe839b91511..d67a16f2a45 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -174,27 +174,6 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(struct qstr *qstr, int ms) -{ - const char *name; - int len; - - len = qstr->len; - name = qstr->name; - if (ms) { - while (len && name[len-1] == '.') - len--; - } - - qstr->hash = full_name_hash(name, len); - - return 0; -} - -/* - * Compute the hash for the isofs name corresponding to the dentry. - */ -static int isofs_hashi_common(struct qstr *qstr, int ms) { const char *name; @@ -263,6 +242,27 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, } #ifdef CONFIG_JOLIET +/* + * Compute the hash for the isofs name corresponding to the dentry. + */ +static int +isofs_hash_common(struct qstr *qstr, int ms) +{ + const char *name; + int len; + + len = qstr->len; + name = qstr->name; + if (ms) { + while (len && name[len-1] == '.') + len--; + } + + qstr->hash = full_name_hash(name, len); + + return 0; +} + static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index aab8549591e..c46a79adb6a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1373,8 +1373,7 @@ int journal_destroy(journal_t *journal) } mutex_unlock(&journal->j_checkpoint_mutex); - if (journal->j_inode) - iput(journal->j_inode); + iput(journal->j_inode); if (journal->j_revoke) journal_destroy_revoke(journal); kfree(journal->j_wbuf); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e4dc74713a4..1df94fabe4e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1853,13 +1853,12 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, journal->j_chksum_driver = NULL; return 0; } - } - /* Precompute checksum seed for all metadata */ - if (jbd2_journal_has_csum_v2or3(journal)) + /* Precompute checksum seed for all metadata */ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); + } } /* If enabling v1 checksums, downgrade superblock */ diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index cf47f09e8ac..fa7e795bd8a 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -94,6 +94,9 @@ struct jfs_inode_info { unchar _inline_ea[128]; /* 128: inline extended attr */ } link; } u; +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif u32 dev; /* will die when we get wide dev_t */ struct inode vfs_inode; }; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index d59c7defb1e..38fdc533f4e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -84,7 +84,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, struct inode *iplist[2]; struct tblock *tblk; - jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name); + jfs_info("jfs_create: dip:0x%p name:%pd", dip, dentry); dquot_initialize(dip); @@ -216,7 +216,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) struct inode *iplist[2]; struct tblock *tblk; - jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name); + jfs_info("jfs_mkdir: dip:0x%p name:%pd", dip, dentry); dquot_initialize(dip); @@ -352,7 +352,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) struct inode *iplist[2]; struct tblock *tblk; - jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); + jfs_info("jfs_rmdir: dip:0x%p name:%pd", dip, dentry); /* Init inode for quota operations. */ dquot_initialize(dip); @@ -480,7 +480,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) s64 new_size = 0; int commit_flag; - jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name); + jfs_info("jfs_unlink: dip:0x%p name:%pd", dip, dentry); /* Init inode for quota operations. */ dquot_initialize(dip); @@ -797,8 +797,7 @@ static int jfs_link(struct dentry *old_dentry, struct btstack btstack; struct inode *iplist[2]; - jfs_info("jfs_link: %s %s", old_dentry->d_name.name, - dentry->d_name.name); + jfs_info("jfs_link: %pd %pd", old_dentry, dentry); dquot_initialize(dir); @@ -1082,8 +1081,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, int commit_flag; - jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, - new_dentry->d_name.name); + jfs_info("jfs_rename: %pd %pd", old_dentry, new_dentry); dquot_initialize(old_dir); dquot_initialize(new_dir); @@ -1355,7 +1353,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - jfs_info("jfs_mknod: %s", dentry->d_name.name); + jfs_info("jfs_mknod: %pd", dentry); dquot_initialize(dir); @@ -1444,7 +1442,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsig struct component_name key; int rc; - jfs_info("jfs_lookup: name = %s", dentry->d_name.name); + jfs_info("jfs_lookup: name = %pd", dentry); if ((rc = get_UCSname(&key, dentry))) return ERR_PTR(rc); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 93e897e588a..16c3a955663 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -117,6 +117,9 @@ static struct inode *jfs_alloc_inode(struct super_block *sb) jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); if (!jfs_inode) return NULL; +#ifdef CONFIG_QUOTA + memset(&jfs_inode->i_dquot, 0, sizeof(jfs_inode->i_dquot)); +#endif return &jfs_inode->vfs_inode; } @@ -537,6 +540,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; sb->s_qcop = &dquot_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif /* @@ -836,6 +840,10 @@ out: return len - towrite; } +static struct dquot **jfs_get_dquots(struct inode *inode) +{ + return JFS_IP(inode)->i_dquot; +} #endif static const struct super_operations jfs_super_operations = { @@ -854,6 +862,7 @@ static const struct super_operations jfs_super_operations = { #ifdef CONFIG_QUOTA .quota_read = jfs_quota_read, .quota_write = jfs_quota_write, + .get_dquots = jfs_get_dquots, #endif }; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 1c771931bb6..37989f02a22 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -807,7 +807,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, } /* instantiate and hash dentry */ - ret = d_materialise_unique(dentry, inode); + ret = d_splice_alias(inode, dentry); out_unlock: mutex_unlock(&kernfs_mutex); return ret; diff --git a/fs/libfs.c b/fs/libfs.c index 171d2846f2a..005843ce5db 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -114,18 +114,18 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) spin_lock(&dentry->d_lock); /* d_lock not required for cursor */ - list_del(&cursor->d_u.d_child); + list_del(&cursor->d_child); p = dentry->d_subdirs.next; while (n && p != &dentry->d_subdirs) { struct dentry *next; - next = list_entry(p, struct dentry, d_u.d_child); + next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(next)) n--; spin_unlock(&next->d_lock); p = p->next; } - list_add_tail(&cursor->d_u.d_child, p); + list_add_tail(&cursor->d_child, p); spin_unlock(&dentry->d_lock); } } @@ -150,7 +150,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p, *q = &cursor->d_u.d_child; + struct list_head *p, *q = &cursor->d_child; if (!dir_emit_dots(file, ctx)) return 0; @@ -159,7 +159,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) list_move(q, &dentry->d_subdirs); for (p = q->next; p != &dentry->d_subdirs; p = p->next) { - struct dentry *next = list_entry(p, struct dentry, d_u.d_child); + struct dentry *next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (!simple_positive(next)) { spin_unlock(&next->d_lock); @@ -287,7 +287,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dentry->d_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dentry->d_subdirs, d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(child)) { spin_unlock(&child->d_lock); diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 13db95f5417..56598742dde 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops; static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); -#ifdef LOCKD_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) { /* diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index b6f3b84b6e9..d12ff4e2dbe 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -408,7 +408,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file) { struct super_block *sb = datap; - return sb == file->f_file->f_path.dentry->d_sb; + return sb == file_inode(file->f_file)->i_sb; } /** diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 7cb751dfbee..00896010152 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -198,8 +198,8 @@ ncp_single_volume(struct ncp_server *server) static inline int ncp_is_server_root(struct inode *inode) { - return (!ncp_single_volume(NCP_SERVER(inode)) && - inode == inode->i_sb->s_root->d_inode); + return !ncp_single_volume(NCP_SERVER(inode)) && + is_root_inode(inode); } @@ -403,7 +403,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&parent->d_lock); - list_for_each_entry(dent, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(dent, &parent->d_subdirs, d_child) { if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget(dent); @@ -685,8 +685,7 @@ static void ncp_read_volume_list(struct file *file, struct dir_context *ctx, struct ncp_cache_control *ctl) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); struct ncp_server *server = NCP_SERVER(inode); struct ncp_volume_info info; struct ncp_entry_info entry; @@ -721,8 +720,7 @@ static void ncp_do_readdir(struct file *file, struct dir_context *ctx, struct ncp_cache_control *ctl) { - struct dentry *dentry = file->f_path.dentry; - struct inode *dir = dentry->d_inode; + struct inode *dir = file_inode(file); struct ncp_server *server = NCP_SERVER(dir); struct nw_search_sequence seq; struct ncp_entry_info entry; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 77640a8bfb8..1dd7007f974 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -100,8 +100,7 @@ out: static ssize_t ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); size_t already_read = 0; off_t pos; size_t bufsize; @@ -109,7 +108,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) void* freepage; size_t freelen; - ncp_dbg(1, "enter %pd2\n", dentry); + ncp_dbg(1, "enter %pD2\n", file); pos = *ppos; @@ -167,7 +166,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) file_accessed(file); - ncp_dbg(1, "exit %pd2\n", dentry); + ncp_dbg(1, "exit %pD2\n", file); outrel: ncp_inode_close(inode); return already_read ? already_read : error; @@ -176,15 +175,14 @@ outrel: static ssize_t ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); size_t already_written = 0; off_t pos; size_t bufsize; int errno; void* bouncebuffer; - ncp_dbg(1, "enter %pd2\n", dentry); + ncp_dbg(1, "enter %pD2\n", file); if ((ssize_t) count < 0) return -EINVAL; pos = *ppos; @@ -263,7 +261,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * i_size_write(inode, pos); mutex_unlock(&inode->i_mutex); } - ncp_dbg(1, "exit %pd2\n", dentry); + ncp_dbg(1, "exit %pD2\n", file); outrel: ncp_inode_close(inode); return already_written ? already_written : errno; diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index d5659d96ee7..cf7e043a944 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -447,7 +447,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg result = -EIO; } } - result = 0; } mutex_unlock(&server->root_setup_lock); diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index b359d12eb35..33b873b259a 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -30,9 +30,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, struct vm_fault *vmf) { - struct file *file = area->vm_file; - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(area->vm_file); char *pg_addr; unsigned int already_read; unsigned int count; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 52cb19d66ec..b785f74bfe3 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -191,7 +191,7 @@ ncp_renew_dentries(struct dentry *parent) struct dentry *dentry; spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(dentry, &parent->d_subdirs, d_child) { if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); else @@ -207,7 +207,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) struct dentry *dentry; spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(dentry, &parent->d_subdirs, d_child) { dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); } diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 5228f201d3d..77fec6a55f5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) loff_t offset = header->args.offset; size_t count = header->args.count; struct page **pages = header->args.pages; - int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; + int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; unsigned int pg_len; struct blk_plug plug; int i; @@ -812,7 +812,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) /* Optimize common case that writes from 0 to end of file */ end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); - if (end != NFS_I(inode)->npages) { + if (end != inode->i_mapping->nrpages) { rcu_read_lock(); end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); rcu_read_unlock(); diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index e966c023b1b..dbe5839cdeb 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); + mutex_lock(&nn->bl_mutex); bl_pipe_msg.bl_wq = &nn->bl_wq; b->simple.len += 4; /* single volume */ if (b->simple.len > PAGE_SIZE) - return -EIO; + goto out_unlock; memset(msg, 0, sizeof(*msg)); msg->len = sizeof(*bl_msg) + b->simple.len; msg->data = kzalloc(msg->len, gfp_mask); if (!msg->data) - goto out; + goto out_free_data; bl_msg = msg->data; bl_msg->type = BL_DEVICE_MOUNT, @@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, rc = rpc_queue_upcall(nn->bl_device_pipe, msg); if (rc < 0) { remove_wait_queue(&nn->bl_wq, &wq); - goto out; + goto out_free_data; } set_current_state(TASK_UNINTERRUPTIBLE); @@ -97,19 +98,21 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, if (reply->status != BL_DEVICE_REQUEST_PROC) { printk(KERN_WARNING "%s failed to decode device: %d\n", __func__, reply->status); - goto out; + goto out_free_data; } dev = MKDEV(reply->major, reply->minor); -out: +out_free_data: kfree(msg->data); +out_unlock: + mutex_unlock(&nn->bl_mutex); return dev; } static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { - struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, + struct nfs_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info, nfs_net_id); if (mlen != sizeof (struct bl_dev_msg)) @@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); struct dentry *dentry; + mutex_init(&nn->bl_mutex); init_waitqueue_head(&nn->bl_wq); nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); if (IS_ERR(nn->bl_device_pipe)) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 73466b93409..e36a9d78ea4 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->npages != 0) + if (nfsi->nrequests != 0) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5853f53db73..7f3f6064134 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -125,6 +125,8 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (!nfs4_stateid_match(&state->stateid, stateid)) continue; get_nfs_open_context(ctx); @@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * { int res = 0; - res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); + if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + res = nfs4_proc_delegreturn(inode, + delegation->cred, + &delegation->stateid, + issync); nfs_free_delegation(delegation); return res; } @@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - int err; + int err = 0; if (delegation == NULL) return 0; do { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + break; err = nfs_delegation_claim_opens(inode, &delegation->stateid); if (!issync || err != -EAGAIN) break; @@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } +static void nfs_revoke_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) { + set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + nfs_mark_return_delegation(NFS_SERVER(inode), delegation); + } + rcu_read_unlock(); +} + void nfs_remove_bad_delegation(struct inode *inode) { struct nfs_delegation *delegation; + nfs_revoke_delegation(inode); delegation = nfs_inode_detach_delegation(inode); if (delegation) { nfs_inode_find_state_and_recover(inode, &delegation->stateid); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5c1cce39297..e3c20a3ccc9 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -31,6 +31,7 @@ enum { NFS_DELEGATION_RETURN_IF_CLOSED, NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, + NFS_DELEGATION_REVOKED, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 06e8cfcbb67..9b0c55cb2a2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -133,7 +133,7 @@ out: static int nfs_closedir(struct inode *inode, struct file *filp) { - put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data); + put_nfs_open_dir_context(file_inode(filp), filp->private_data); return 0; } @@ -499,7 +499,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (IS_ERR(inode)) goto out; - alias = d_materialise_unique(dentry, inode); + alias = d_splice_alias(inode, dentry); if (IS_ERR(alias)) goto out; else if (alias) { @@ -1393,7 +1393,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in nfs_advise_use_readdirplus(dir); no_entry: - res = d_materialise_unique(dentry, inode); + res = d_splice_alias(inode, dentry); if (res != NULL) { if (IS_ERR(res)) goto out_unblock_sillyrename; @@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, case -ENOENT: d_drop(dentry); d_add(dentry, NULL); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; case -EISDIR: case -ENOTDIR: diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 20cffc83046..10bf07280f4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); if (dreq->l_ctx != NULL) nfs_put_lock_context(dreq->l_ctx); if (dreq->ctx != NULL) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 46fab1cb455..7afb52f6a25 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 9bb806a76d9..bfecac781f1 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -204,8 +204,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) ifdebug(FACILITY) print_ds(ds); - if (ds->ds_clp) - nfs_put_client(ds->ds_clp); + nfs_put_client(ds->ds_clp); while (!list_empty(&ds->ds_addrs)) { da = list_first_entry(&ds->ds_addrs, diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3ef01f0ba0b..d63bea8bbfb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -269,8 +269,8 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } return 1; @@ -293,8 +293,8 @@ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) BUG_ON(!PageLocked(page)); fscache_uncache_page(cookie, page); - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } /* @@ -343,19 +343,19 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, case 0: /* read BIO submitted (page in fscache) */ dfprintk(FSCACHE, "NFS: readpage_from_fscache: BIO submitted\n"); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); return ret; case -ENOBUFS: /* inode not in cache */ case -ENODATA: /* page not in cache */ - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); return 1; default: dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); } return ret; } @@ -429,11 +429,11 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) if (ret != 0) { fscache_uncache_page(nfs_i_fscache(inode), page); - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); } else { - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK); } } diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 880618a8b04..9ac3846cb59 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -51,14 +51,14 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i /* * Ensure that this dentry is invisible to d_find_alias(). * Otherwise, it may be spliced into the tree by - * d_materialise_unique if a parent directory from the same + * d_splice_alias if a parent directory from the same * filesystem gets mounted at a later time. * This again causes shrink_dcache_for_umount_subtree() to * Oops, since the test for IS_ROOT() will fail. */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - hlist_del_init(&sb->s_root->d_alias); + hlist_del_init(&sb->s_root->d_u.d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6388a59f2ad..4bffe637ea3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode) nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_caches); void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { @@ -626,7 +627,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; - int err; + int err = 0; trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ @@ -1149,7 +1150,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) { + && nfsi->nrequests == 0) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1192,7 +1193,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->npages == 0) + if (cur_size != new_isize && nfsi->nrequests == 0) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } @@ -1619,7 +1620,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0) || new_isize > cur_isize) { + if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid &= ~NFS_INO_REVAL_PAGECACHE; @@ -1784,7 +1785,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->npages = 0; + nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index c5832487c45..0cb806fbd4c 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -55,6 +55,11 @@ static inline void nfs_add_fscache_stats(struct inode *inode, { this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); } +static inline void nfs_inc_fscache_stats(struct inode *inode, + enum nfs_stat_fscachecounters stat) +{ + this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]); +} #endif static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index ef221fb8a18..f0e06e4acbe 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -19,6 +19,7 @@ struct nfs_net { struct rpc_pipe *bl_device_pipe; struct bl_dev_msg bl_mount_reply; wait_queue_head_t bl_wq; + struct mutex bl_mutex; struct list_head nfs_client_list; struct list_head nfs_volume_list; #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index d10333a197b..7afb8947dfd 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -6,6 +6,8 @@ #define __LINUX_FS_NFS_NFS4_2_H /* nfs4.2proc.c */ +int nfs42_proc_allocate(struct file *, loff_t, loff_t); +int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); /* nfs4.2xdr.h */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0886f1db591..cb170722769 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -32,6 +32,81 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, return ret; } +static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + struct nfs42_falloc_args args = { + .falloc_fh = NFS_FH(inode), + .falloc_offset = offset, + .falloc_length = len, + }; + struct nfs42_falloc_res res; + struct nfs_server *server = NFS_SERVER(inode); + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); + if (status) + return status; + + return nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); +} + +static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_fallocate(msg, filep, offset, len); + if (err == -ENOTSUPP) + return -EOPNOTSUPP; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; + return err; +} + +int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; + return err; +} + loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); @@ -50,7 +125,7 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) struct nfs_server *server = NFS_SERVER(inode); int status; - if (!(server->caps & NFS_CAP_SEEK)) + if (!nfs_server_capable(inode, NFS_CAP_SEEK)) return -ENOTSUPP; status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c90469b604b..038a7e1521f 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,15 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#define encode_fallocate_maxsz (encode_stateid_maxsz + \ + 2 /* offset */ + \ + 2 /* length */) +#define encode_allocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_allocate_maxsz (op_decode_hdr_maxsz) +#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_deallocate_maxsz (op_decode_hdr_maxsz) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -14,6 +23,18 @@ 2 /* offset */ + \ 2 /* length */) +#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_allocate_maxsz) +#define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_allocate_maxsz) +#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_deallocate_maxsz) +#define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_deallocate_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_seek_maxsz) @@ -22,6 +43,30 @@ decode_seek_maxsz) +static void encode_fallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + encode_nfs4_stateid(xdr, &args->falloc_stateid); + encode_uint64(xdr, args->falloc_offset); + encode_uint64(xdr, args->falloc_length); +} + +static void encode_allocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + +static void encode_deallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + static void encode_seek(struct xdr_stream *xdr, struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -33,6 +78,42 @@ static void encode_seek(struct xdr_stream *xdr, } /* + * Encode ALLOCATE request + */ +static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_allocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* + * Encode DEALLOCATE request + */ +static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_deallocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* * Encode SEEK request */ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, @@ -50,6 +131,16 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_ALLOCATE); +} + +static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_DEALLOCATE); +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -73,6 +164,54 @@ out_overflow: } /* + * Decode ALLOCATE request + */ +static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_allocate(xdr, res); +out: + return status; +} + +/* + * Decode DEALLOCATE request + */ +static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_deallocate(xdr, res); +out: + return status; +} + +/* * Decode SEEK request */ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index be6cac37ea1..a08178764cf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); /* nfs4proc.c */ +extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ffdb28d86cf..03311259b0c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -241,28 +241,25 @@ void nfs4_free_client(struct nfs_client *clp) */ static int nfs4_init_callback(struct nfs_client *clp) { + struct rpc_xprt *xprt; int error; - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + if (error < 0) return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + return 0; } @@ -498,8 +495,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -517,8 +513,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); @@ -549,8 +544,7 @@ int nfs40_walk_client_list(struct nfs_client *new, /* No match found. The server lost our clientid */ out: - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); dprintk("NFS: <-- %s status = %d\n", __func__, status); return status; } @@ -641,8 +635,7 @@ int nfs41_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -675,8 +668,7 @@ int nfs41_walk_client_list(struct nfs_client *new, /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c51fb4db9bf..8b46389c4c5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -3,6 +3,8 @@ * * Copyright (C) 1992 Rick Sladkey */ +#include <linux/fs.h> +#include <linux/falloc.h> #include <linux/nfs_fs.h> #include "internal.h" #include "fscache.h" @@ -134,6 +136,32 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) return nfs_file_llseek(filep, offset, whence); } } + +static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + long ret; + + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))) + return -EOPNOTSUPP; + + ret = inode_newsize_ok(inode, offset + len); + if (ret < 0) + return ret; + + mutex_lock(&inode->i_mutex); + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = nfs42_proc_deallocate(filep, offset, len); + else + ret = nfs42_proc_allocate(filep, offset, len); + mutex_unlock(&inode->i_mutex); + + nfs_zap_caches(inode); + return ret; +} #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { @@ -155,6 +183,9 @@ const struct file_operations nfs4_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, +#ifdef CONFIG_NFS_V4_2 + .fallocate = nfs42_fallocate, +#endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 405bd95c1f5..e7f8d5ff258 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -158,8 +158,6 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; - case -NFS4ERR_ACCESS: - return -EACCES; case -NFS4ERR_FILE_OPEN: return -EBUSY; default: @@ -344,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; @@ -370,11 +368,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { - nfs_remove_bad_delegation(inode); - exception->retry = 1; - break; - } if (state == NULL) break; ret = nfs4_schedule_stateid_recovery(server, state); @@ -1654,7 +1647,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - return 0; + return -EAGAIN; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: set_bit(NFS_DELEGATED_STATE, &state->flags); @@ -2109,46 +2102,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) +{ + nfs_remove_bad_delegation(state->inode); + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); + write_sequnlock(&state->seqlock); + clear_bit(NFS_DELEGATED_STATE, &state->flags); +} + +static void nfs40_clear_delegation_stateid(struct nfs4_state *state) +{ + if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) + nfs_finish_clear_delegation_stateid(state); +} + +static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + /* NFSv4.0 doesn't allow for delegation recovery on open expire */ + nfs40_clear_delegation_stateid(state); + return nfs4_open_expired(sp, state); +} + #if defined(CONFIG_NFS_V4_1) -static void nfs41_clear_delegation_stateid(struct nfs4_state *state) +static void nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - nfs4_stateid *stateid = &state->stateid; + nfs4_stateid stateid; struct nfs_delegation *delegation; - struct rpc_cred *cred = NULL; - int status = -NFS4ERR_BAD_STATEID; - - /* If a state reset has been done, test_stateid is unneeded */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - return; + struct rpc_cred *cred; + int status; /* Get the delegation credential for use by test/free_stateid */ rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && - nfs4_stateid_match(&delegation->stateid, stateid)) { - cred = get_rpccred(delegation->cred); - rcu_read_unlock(); - status = nfs41_test_stateid(server, stateid, cred); - trace_nfs4_test_delegation_stateid(state, NULL, status); - } else + if (delegation == NULL) { rcu_read_unlock(); + return; + } + + nfs4_stateid_copy(&stateid, &delegation->stateid); + cred = get_rpccred(delegation->cred); + rcu_read_unlock(); + status = nfs41_test_stateid(server, &stateid, cred); + trace_nfs4_test_delegation_stateid(state, NULL, status); if (status != NFS_OK) { /* Free the stateid unless the server explicitly * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - nfs_remove_bad_delegation(state->inode); - - write_seqlock(&state->seqlock); - nfs4_stateid_copy(&state->stateid, &state->open_stateid); - write_sequnlock(&state->seqlock); - clear_bit(NFS_DELEGATED_STATE, &state->flags); + nfs41_free_stateid(server, &stateid, cred); + nfs_finish_clear_delegation_stateid(state); } - if (cred != NULL) - put_rpccred(cred); + put_rpccred(cred); } /** @@ -2192,7 +2199,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st { int status; - nfs41_clear_delegation_stateid(state); + nfs41_check_delegation_stateid(state); status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); @@ -2231,19 +2238,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); ret = _nfs4_proc_open(opendata); - if (ret != 0) { - if (ret == -ENOENT) { - dentry = opendata->dentry; - if (dentry->d_inode) - d_delete(dentry); - else if (d_unhashed(dentry)) - d_add(dentry, NULL); - - nfs_set_verifier(dentry, - nfs_save_change_attribute(opendata->dir->d_inode)); - } + if (ret != 0) goto out; - } state = nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); @@ -4841,9 +4837,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -7709,6 +7702,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -7721,9 +7717,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); @@ -8341,7 +8334,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, + .recover_open = nfs40_open_expired, .recover_lock = nfs4_lock_expired, .establish_clid = nfs4_init_clientid, }; @@ -8408,8 +8401,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1 - | NFS_CAP_SEEK, + | NFS_CAP_ATOMIC_OPEN_V1, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, @@ -8431,7 +8423,10 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_ALLOCATE + | NFS_CAP_DEALLOCATE + | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 206c08a60c7..cb4376b78ed 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -141,13 +141,15 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ 1 /* sc_prog */ + \ - XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ - XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ 1) /* sc_cb_ident */ #define decode_setclientid_maxsz \ (op_decode_hdr_maxsz + \ - 2 + \ - 1024) /* large value for CLID_INUSE */ + 2 /* clientid */ + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) #define encode_setclientid_confirm_maxsz \ (op_encode_hdr_maxsz + \ 3 + (NFS4_VERIFIER_SIZE >> 2)) @@ -7394,6 +7396,8 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 PROC(SEEK, enc_seek, dec_seek), + PROC(ALLOCATE, enc_allocate, dec_allocate), + PROC(DEALLOCATE, enc_deallocate, dec_deallocate), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed0db61f854..2b5e769beb1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) static inline void nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) { + struct inode *inode; WARN_ON_ONCE(prev == req); if (!prev) { @@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) * nfs_page_group_destroy is called */ kref_get(&req->wb_head->wb_kref); - /* grab extra ref if head request has extra ref from - * the write/commit path to handle handoff between write - * and commit lists */ + /* grab extra ref and bump the request count if head request + * has extra ref from the write/commit path to handle handoff + * between write and commit lists. */ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); + spin_lock(&inode->i_lock); + NFS_I(inode)->nrequests++; + spin_unlock(&inode->i_lock); } } } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index beff2769c5c..c91a4799c56 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - nfs_add_stats(inode, NFSIOS_READPAGES, 1); + nfs_inc_stats(inode, NFSIOS_READPAGES); /* * Try to flush any pending writes to the file.. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 12493846a2d..af3af685a9e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + nfs_inc_stats(inode, NFSIOS_WRITEPAGES); nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); @@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (!nfsi->nrequests && + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* * Swap-space should not get truncated. Hence no need to plug the race @@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->npages++; + nfsi->nrequests++; /* this a head request for a page group - mark it as having an - * extra reference so sub groups can follow suit */ + * extra reference so sub groups can follow suit. + * This flag also informs pgio layer when to bump nrequests when + * adding subrequests. */ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -709,14 +712,16 @@ static void nfs_inode_remove_request(struct nfs_page *req) wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->npages--; + nfsi->nrequests--; + spin_unlock(&inode->i_lock); + } else { + spin_lock(&inode->i_lock); + nfsi->nrequests--; spin_unlock(&inode->i_lock); } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) nfs_release_request(req); - else - WARN_ON_ONCE(1); } static void @@ -1737,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ed2b1151b17..7cbdf1b2e4a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -774,8 +774,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) { if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); - dprintk("%s slot is busy\n", __func__); - return false; + /* Race breaker */ + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + dprintk("%s slot is busy\n", __func__); + return false; + } + rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } return true; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index a25490ae6c6..cc6a7607200 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -245,10 +245,11 @@ struct nfs4_dir_ctx { }; static int -nfsd4_build_namelist(void *arg, const char *name, int namlen, +nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct nfs4_dir_ctx *ctx = arg; + struct nfs4_dir_ctx *ctx = + container_of(__ctx, struct nfs4_dir_ctx, ctx); struct name_list *entry; if (namlen != HEXDIR_LEN - 1) @@ -704,7 +705,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct cld_upcall *tmp, *cup; struct cld_msg __user *cmsg = (struct cld_msg __user *)src; uint32_t xid; - struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, + struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info, nfsd_net_id); struct cld_net *cn = nn->cld_net; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e9c3afe4b5d..4e1d7268b00 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -41,7 +41,7 @@ #include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> -#include <linux/hash.h> +#include <linux/jhash.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -594,7 +594,7 @@ static int delegation_blocked(struct knfsd_fh *fh) } spin_unlock(&blocked_delegations_lock); } - hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_base, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) @@ -613,7 +613,7 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; - hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_base, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index eeea7a90eb8..b1eed4dd2ea 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1886,7 +1886,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, goto out_free; } p = xdr_encode_opaque(p, dentry->d_name.name, len); - dprintk("/%s", dentry->d_name.name); + dprintk("/%pd", dentry); spin_unlock(&dentry->d_lock); dput(dentry); ncomponents--; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index ca73ca79a0e..9506ea56561 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -231,6 +231,10 @@ static struct file_operations reply_cache_stats_operations = { * payload - write methods */ +static inline struct net *netns(struct file *file) +{ + return file_inode(file)->i_sb->s_fs_info; +} /** * write_unlock_ip - Release all locks used by a client @@ -252,7 +256,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; - struct net *net = file->f_dentry->d_sb->s_fs_info; + struct net *net = netns(file); /* sanity check */ if (size == 0) @@ -350,7 +354,6 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) int len; struct auth_domain *dom; struct knfsd_fh fh; - struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -385,7 +388,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(net, dom, path, &fh, maxsize); + len = exp_rootfh(netns(file), dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -429,7 +432,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; - struct net *net = file->f_dentry->d_sb->s_fs_info; + struct net *net = netns(file); if (size > 0) { int newthreads; @@ -480,7 +483,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; - struct net *net = file->f_dentry->d_sb->s_fs_info; + struct net *net = netns(file); mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); @@ -543,8 +546,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; - struct net *net = file->f_dentry->d_sb->s_fs_info; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size>0) { if (nn->nfsd_serv) @@ -830,10 +832,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; - struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); - rv = __write_ports(file, buf, size, net); + rv = __write_ports(file, buf, size, netns(file)); mutex_unlock(&nfsd_mutex); return rv; } @@ -865,8 +866,7 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; - struct net *net = file->f_dentry->d_sb->s_fs_info; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { int bsize; @@ -915,8 +915,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) static ssize_t write_maxconn(struct file *file, char *buf, size_t size) { char *mesg = buf; - struct net *net = file->f_dentry->d_sb->s_fs_info; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); unsigned int maxconn = nn->max_connections; if (size > 0) { @@ -997,8 +996,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - struct net *net = file->f_dentry->d_sb->s_fs_info; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } @@ -1014,8 +1012,7 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - struct net *net = file->f_dentry->d_sb->s_fs_info; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } @@ -1071,8 +1068,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; - struct net *net = file->f_dentry->d_sb->s_fs_info; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); @@ -1102,8 +1098,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) */ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) { - struct net *net = file->f_dentry->d_sb->s_fs_info; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { switch(buf[0]) { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 747f3b95bd1..33a46a8dfaf 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -335,12 +335,15 @@ void nfsd_lockd_shutdown(void); (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ - (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL #else -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#define NFSD4_2_SECURITY_ATTRS 0 #endif +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + NFSD4_2_SECURITY_ATTRS) + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 989129e2d6e..0a82e3c033e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -930,7 +930,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, unsigned long *cnt, int *stablep) { struct svc_export *exp; - struct dentry *dentry; struct inode *inode; mm_segment_t oldfs; __be32 err = 0; @@ -949,8 +948,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, */ current->flags |= PF_LESS_THROTTLE; - dentry = file->f_path.dentry; - inode = dentry->d_inode; + inode = file_inode(file); exp = fhp->fh_export; use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); @@ -1819,10 +1817,12 @@ struct readdir_data { int full; }; -static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) +static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) { - struct readdir_data *buf = __buf; + struct readdir_data *buf = + container_of(ctx, struct readdir_data, ctx); struct buffered_dirent *de = (void *)(buf->dirent + buf->used); unsigned int reclen; @@ -1842,7 +1842,7 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, return 0; } -static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, +static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, struct readdir_cd *cdp, loff_t *offsetp) { struct buffered_dirent *de; @@ -1926,7 +1926,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, */ __be32 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, - struct readdir_cd *cdp, filldir_t func) + struct readdir_cd *cdp, nfsd_filldir_t func) { __be32 err; struct file *file; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index c2ff3f14e5f..b1796d6ee53 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -36,7 +36,7 @@ /* * Callback function for readdir */ -typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); +typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ int nfsd_racache_init(int); @@ -95,7 +95,7 @@ __be32 nfsd_rename(struct svc_rqst *, __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, char *name, int len); __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, - loff_t *, struct readdir_cd *, filldir_t); + loff_t *, struct readdir_cd *, nfsd_filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index e9e3325f29f..3a03e0aea1f 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -39,21 +39,15 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ struct the_nilfs *nilfs; struct inode *inode = file->f_mapping->host; - int err; - - err = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (err) - return err; - mutex_lock(&inode->i_mutex); + int err = 0; if (nilfs_inode_dirty(inode)) { if (datasync) err = nilfs_construct_dsync_segment(inode->i_sb, inode, - 0, LLONG_MAX); + start, end); else err = nilfs_construct_segment(inode->i_sb); } - mutex_unlock(&inode->i_mutex); nilfs = inode->i_sb->s_fs_info; if (!err) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index e1fa69b341b..8b5969538f3 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -49,6 +49,8 @@ struct nilfs_iget_args { int for_gc; }; +static int nilfs_iget_test(struct inode *inode, void *opaque); + void nilfs_inode_add_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; @@ -348,6 +350,17 @@ const struct address_space_operations nilfs_aops = { .is_partially_uptodate = block_is_partially_uptodate, }; +static int nilfs_insert_inode_locked(struct inode *inode, + struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); +} + struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; @@ -383,7 +396,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); if (err < 0) - goto failed_bmap; + goto failed_after_creation; set_bit(NILFS_I_BMAP, &ii->i_state); /* No lock is needed; iget() ensures it. */ @@ -399,21 +412,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) spin_lock(&nilfs->ns_next_gen_lock); inode->i_generation = nilfs->ns_next_generation++; spin_unlock(&nilfs->ns_next_gen_lock); - insert_inode_hash(inode); + if (nilfs_insert_inode_locked(inode, root, ino) < 0) { + err = -EIO; + goto failed_after_creation; + } err = nilfs_init_acl(inode, dir); if (unlikely(err)) - goto failed_acl; /* never occur. When supporting + goto failed_after_creation; /* never occur. When supporting nilfs_init_acl(), proper cancellation of above jobs should be considered */ return inode; - failed_acl: - failed_bmap: + failed_after_creation: clear_nlink(inode); + unlock_new_inode(inode); iput(inode); /* raw_inode will be deleted through - generic_delete_inode() */ + nilfs_evict_inode() */ goto failed; failed_ifile_create_inode: @@ -461,8 +477,8 @@ int nilfs_read_inode_common(struct inode *inode, inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - if (inode->i_nlink == 0 && inode->i_mode == 0) - return -EINVAL; /* this inode is deleted */ + if (inode->i_nlink == 0) + return -ESTALE; /* this inode is deleted */ inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); ii->i_flags = le32_to_cpu(raw_inode->i_flags); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 9de78f08989..0f84b257932 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) int err = nilfs_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -182,6 +184,7 @@ out: out_fail: drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, inode_inc_link_count(inode); ihold(inode); - err = nilfs_add_nondir(dentry, inode); - if (!err) + err = nilfs_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); err = nilfs_transaction_commit(dir->i_sb); - else + } else { + inode_dec_link_count(inode); + iput(inode); nilfs_transaction_abort(dir->i_sb); + } return err; } @@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) nilfs_mark_inode_dirty(inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); @@ -255,6 +263,7 @@ out_fail: drop_nlink(inode); drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); out_dir: drop_nlink(dir); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 9da25fe9ea6..69bd801afb5 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -808,8 +808,7 @@ void nilfs_put_root(struct nilfs_root *root) spin_lock(&nilfs->ns_cptree_lock); rb_erase(&root->rb_node, &nilfs->ns_cptree); spin_unlock(&nilfs->ns_cptree_lock); - if (root->ifile) - iput(root->ifile); + iput(root->ifile); kfree(root); } diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 9d7e2b9659c..6ffd220eb14 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -20,25 +20,24 @@ #if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY) -static int show_fdinfo(struct seq_file *m, struct file *f, - int (*show)(struct seq_file *m, struct fsnotify_mark *mark)) +static void show_fdinfo(struct seq_file *m, struct file *f, + void (*show)(struct seq_file *m, + struct fsnotify_mark *mark)) { struct fsnotify_group *group = f->private_data; struct fsnotify_mark *mark; - int ret = 0; mutex_lock(&group->mark_mutex); list_for_each_entry(mark, &group->marks_list, g_list) { - ret = show(m, mark); - if (ret) + show(m, mark); + if (seq_has_overflowed(m)) break; } mutex_unlock(&group->mark_mutex); - return ret; } #if defined(CONFIG_EXPORTFS) -static int show_mark_fhandle(struct seq_file *m, struct inode *inode) +static void show_mark_fhandle(struct seq_file *m, struct inode *inode) { struct { struct file_handle handle; @@ -52,71 +51,62 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); if ((ret == FILEID_INVALID) || (ret < 0)) { WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); - return 0; + return; } f.handle.handle_type = ret; f.handle.handle_bytes = size * sizeof(u32); - ret = seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:", - f.handle.handle_bytes, f.handle.handle_type); + seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:", + f.handle.handle_bytes, f.handle.handle_type); for (i = 0; i < f.handle.handle_bytes; i++) - ret |= seq_printf(m, "%02x", (int)f.handle.f_handle[i]); - - return ret; + seq_printf(m, "%02x", (int)f.handle.f_handle[i]); } #else -static int show_mark_fhandle(struct seq_file *m, struct inode *inode) +static void show_mark_fhandle(struct seq_file *m, struct inode *inode) { - return 0; } #endif #ifdef CONFIG_INOTIFY_USER -static int inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) +static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) { struct inotify_inode_mark *inode_mark; struct inode *inode; - int ret = 0; if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE))) - return 0; + return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode = igrab(mark->i.inode); if (inode) { - ret = seq_printf(m, "inotify wd:%x ino:%lx sdev:%x " - "mask:%x ignored_mask:%x ", - inode_mark->wd, inode->i_ino, - inode->i_sb->s_dev, - mark->mask, mark->ignored_mask); - ret |= show_mark_fhandle(m, inode); - ret |= seq_putc(m, '\n'); + seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", + inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, + mark->mask, mark->ignored_mask); + show_mark_fhandle(m, inode); + seq_putc(m, '\n'); iput(inode); } - - return ret; } -int inotify_show_fdinfo(struct seq_file *m, struct file *f) +void inotify_show_fdinfo(struct seq_file *m, struct file *f) { - return show_fdinfo(m, f, inotify_fdinfo); + show_fdinfo(m, f, inotify_fdinfo); } #endif /* CONFIG_INOTIFY_USER */ #ifdef CONFIG_FANOTIFY -static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) +static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) { unsigned int mflags = 0; struct inode *inode; - int ret = 0; if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) - return 0; + return; if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; @@ -124,26 +114,22 @@ static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { inode = igrab(mark->i.inode); if (!inode) - goto out; - ret = seq_printf(m, "fanotify ino:%lx sdev:%x " - "mflags:%x mask:%x ignored_mask:%x ", - inode->i_ino, inode->i_sb->s_dev, - mflags, mark->mask, mark->ignored_mask); - ret |= show_mark_fhandle(m, inode); - ret |= seq_putc(m, '\n'); + return; + seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", + inode->i_ino, inode->i_sb->s_dev, + mflags, mark->mask, mark->ignored_mask); + show_mark_fhandle(m, inode); + seq_putc(m, '\n'); iput(inode); } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) { struct mount *mnt = real_mount(mark->m.mnt); - ret = seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x " - "ignored_mask:%x\n", mnt->mnt_id, mflags, - mark->mask, mark->ignored_mask); + seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", + mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); } -out: - return ret; } -int fanotify_show_fdinfo(struct seq_file *m, struct file *f) +void fanotify_show_fdinfo(struct seq_file *m, struct file *f) { struct fsnotify_group *group = f->private_data; unsigned int flags = 0; @@ -169,7 +155,7 @@ int fanotify_show_fdinfo(struct seq_file *m, struct file *f) seq_printf(m, "fanotify flags:%x event-flags:%x\n", flags, group->fanotify_data.f_flags); - return show_fdinfo(m, f, fanotify_fdinfo); + show_fdinfo(m, f, fanotify_fdinfo); } #endif /* CONFIG_FANOTIFY */ diff --git a/fs/notify/fdinfo.h b/fs/notify/fdinfo.h index 556afda990e..9664c4904d6 100644 --- a/fs/notify/fdinfo.h +++ b/fs/notify/fdinfo.h @@ -10,11 +10,11 @@ struct file; #ifdef CONFIG_PROC_FS #ifdef CONFIG_INOTIFY_USER -extern int inotify_show_fdinfo(struct seq_file *m, struct file *f); +void inotify_show_fdinfo(struct seq_file *m, struct file *f); #endif #ifdef CONFIG_FANOTIFY -extern int fanotify_show_fdinfo(struct seq_file *m, struct file *f); +void fanotify_show_fdinfo(struct seq_file *m, struct file *f); #endif #else /* CONFIG_PROC_FS */ diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9d3e9c50066..41e39102743 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -63,14 +63,14 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ spin_lock(&alias->d_lock); - list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &alias->d_subdirs, d_child) { if (!child->d_inode) continue; @@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, &fsnotify_mark_srcu); } + /* + * We need to merge inode & vfsmount mark lists so that inode mark + * ignore masks are properly reflected for mount mark notifications. + * That's why this traversal is so complicated... + */ while (inode_node || vfsmount_node) { - inode_group = vfsmount_group = NULL; + inode_group = NULL; + inode_mark = NULL; + vfsmount_group = NULL; + vfsmount_mark = NULL; if (inode_node) { inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), @@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, vfsmount_group = vfsmount_mark->group; } - if (inode_group > vfsmount_group) { - /* handle inode */ - ret = send_to_group(to_tell, inode_mark, NULL, mask, - data, data_is, cookie, file_name); - /* we didn't use the vfsmount_mark */ - vfsmount_group = NULL; - } else if (vfsmount_group > inode_group) { - ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, - data, data_is, cookie, file_name); - inode_group = NULL; - } else { - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, - mask, data, data_is, cookie, - file_name); + if (inode_group && vfsmount_group) { + int cmp = fsnotify_compare_groups(inode_group, + vfsmount_group); + if (cmp > 0) { + inode_group = NULL; + inode_mark = NULL; + } else if (cmp < 0) { + vfsmount_group = NULL; + vfsmount_mark = NULL; + } } + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, + data, data_is, cookie, file_name); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 9c0898c4cfe..3b68b0ae0a9 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; +/* compare two groups for sorting of marks lists */ +extern int fsnotify_compare_groups(struct fsnotify_group *a, + struct fsnotify_group *b); + extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, __u32 mask); /* add a mark to an inode */ diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index e8497144b32..dfbf5447eea 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, { struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index daf76652fe5..283aa312d74 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -227,14 +227,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, struct fsnotify_event *kevent; char __user *start; int ret; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); start = buf; group = file->private_data; + add_wait_queue(&group->notification_waitq, &wait); while (1) { - prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&group->notification_mutex); kevent = get_one_event(group, count); mutex_unlock(&group->notification_mutex); @@ -264,10 +263,10 @@ static ssize_t inotify_read(struct file *file, char __user *buf, if (start != buf) break; - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } + remove_wait_queue(&group->notification_waitq, &wait); - finish_wait(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d90deaa08e7..34c38fabf51 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas } /* + * Sorting function for lists of fsnotify marks. + * + * Fanotify supports different notification classes (reflected as priority of + * notification group). Events shall be passed to notification groups in + * decreasing priority order. To achieve this marks in notification lists for + * inodes and vfsmounts are sorted so that priorities of corresponding groups + * are descending. + * + * Furthermore correct handling of the ignore mask requires processing inode + * and vfsmount marks of each group together. Using the group address as + * further sort criterion provides a unique sorting order and thus we can + * merge inode and vfsmount lists of marks in linear time and find groups + * present in both lists. + * + * A return value of 1 signifies that b has priority over a. + * A return value of 0 signifies that the two marks have to be handled together. + * A return value of -1 signifies that a has priority over b. + */ +int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) +{ + if (a == b) + return 0; + if (!a) + return 1; + if (!b) + return -1; + if (a->priority < b->priority) + return 1; + if (a->priority > b->priority) + return -1; + if (a < b) + return 1; + return -1; +} + +/* * Attach an initialized mark to a given group and fs object. * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index ac851e8376b..faefa72a11e 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, struct mount *m = real_mount(mnt); struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 436f36037e0..b3973c2fd19 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -111,8 +111,8 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, unsigned long dent_ino; int uname_len; - ntfs_debug("Looking up %s in directory inode 0x%lx.", - dent->d_name.name, dir_ino->i_ino); + ntfs_debug("Looking up %pd in directory inode 0x%lx.", + dent, dir_ino->i_ino); /* Convert the name of the dentry to Unicode. */ uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len, &uname); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1ef547e4937..d9f222987f2 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1251,7 +1251,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, NULL); if (ret < 0) { - ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, " + mlog(ML_ERROR, "Get physical blkno failed for inode %llu, " "at logical block %llu", (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)v_blkno); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index eb9d48746ab..16eff45727e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1127,10 +1127,10 @@ static int o2hb_thread(void *data) elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); mlog(ML_HEARTBEAT, - "start = %lu.%lu, end = %lu.%lu, msec = %u\n", + "start = %lu.%lu, end = %lu.%lu, msec = %u, ret = %d\n", before_hb.tv_sec, (unsigned long) before_hb.tv_usec, after_hb.tv_sec, (unsigned long) after_hb.tv_usec, - elapsed_msec); + elapsed_msec, ret); if (!kthread_should_stop() && elapsed_msec < reg->hr_timeout_ms) { diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 97de0fbd9f7..2e355e0f833 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, size_t veclen, size_t total) { int ret; - struct msghdr msg; + struct msghdr msg = {.msg_flags = 0,}; if (sock == NULL) { ret = -EINVAL; @@ -1736,7 +1736,7 @@ static void o2net_connect_expired(struct work_struct *work) o2net_idle_timeout() / 1000, o2net_idle_timeout() % 1000); - o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); + o2net_set_nn_state(nn, NULL, 0, 0); } spin_unlock(&nn->nn_lock); } diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index e2e05a106be..4fda7a5f308 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -172,7 +172,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, @@ -251,8 +251,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, if (dl) { mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, - " \"%.*s\": old parent: %llu, new: %llu\n", - dentry->d_name.len, dentry->d_name.name, + " \"%pd\": old parent: %llu, new: %llu\n", + dentry, (unsigned long long)parent_blkno, (unsigned long long)dl->dl_parent_blkno); return 0; @@ -277,8 +277,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno); mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, - " \"%.*s\": old parent: %llu, new: %llu\n", - dentry->d_name.len, dentry->d_name.name, + " \"%pd\": old parent: %llu, new: %llu\n", + dentry, (unsigned long long)parent_blkno, (unsigned long long)dl->dl_parent_blkno); @@ -406,17 +406,15 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) if (inode) ino = (unsigned long long)OCFS2_I(inode)->ip_blkno; mlog(ML_ERROR, "Dentry is missing cluster lock. " - "inode: %llu, d_flags: 0x%x, d_name: %.*s\n", - ino, dentry->d_flags, dentry->d_name.len, - dentry->d_name.name); + "inode: %llu, d_flags: 0x%x, d_name: %pd\n", + ino, dentry->d_flags, dentry); } goto out; } - mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n", - dentry->d_name.len, dentry->d_name.name, - dl->dl_count); + mlog_bug_on_msg(dl->dl_count == 0, "dentry: %pd, count: %u\n", + dentry, dl->dl_count); ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 0717662b4ae..79d56dc981b 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -744,7 +744,7 @@ restart: if (ocfs2_read_dir_block(dir, block, &bh, 0)) { /* read error, skip block & hope for the best. * ocfs2_read_dir_block() has released the bh. */ - ocfs2_error(dir->i_sb, "reading directory %llu, " + mlog(ML_ERROR, "reading directory %llu, " "offset %lu\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, block); @@ -2073,10 +2073,12 @@ struct ocfs2_empty_dir_priv { unsigned seen_other; unsigned dx_dir; }; -static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, - loff_t pos, u64 ino, unsigned type) +static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name, + int name_len, loff_t pos, u64 ino, + unsigned type) { - struct ocfs2_empty_dir_priv *p = priv; + struct ocfs2_empty_dir_priv *p = + container_of(ctx, struct ocfs2_empty_dir_priv, ctx); /* * Check the positions of "." and ".." records to be sure diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 02d315fef43..50a59d2337b 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -877,7 +877,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, * to be put in someone's domain map. * Also, explicitly disallow joining at certain troublesome * times (ie. during recovery). */ - if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { + if (dlm->dlm_state != DLM_CTXT_LEAVING) { int bit = query->node_idx; spin_lock(&dlm->spinlock); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 215e41abf10..3689b359204 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1460,6 +1460,18 @@ way_up_top: /* take care of the easy cases up front */ spin_lock(&res->spinlock); + + /* + * Right after dlm spinlock was released, dlm_thread could have + * purged the lockres. Check if lockres got unhashed. If so + * start over. + */ + if (hlist_unhashed(&res->hash_node)) { + spin_unlock(&res->spinlock); + dlm_lockres_put(res); + goto way_up_top; + } + if (res->state & (DLM_LOCK_RES_RECOVERING| DLM_LOCK_RES_MIGRATING)) { spin_unlock(&res->spinlock); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 3365839d297..79b5af5e6a7 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1656,14 +1656,18 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, req.namelen = res->lockname.len; memcpy(req.name, res->lockname.name, res->lockname.len); +resend: ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key, &req, sizeof(req), nodenum, &status); - /* XXX: negative status not handled properly here. */ if (ret < 0) mlog(ML_ERROR, "Error %d when sending message %u (key " "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG, dlm->key, nodenum); - else { + else if (status == -ENOMEM) { + mlog_errno(status); + msleep(50); + goto resend; + } else { BUG_ON(status < 0); BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN); *real_master = (u8) (status & 0xff); @@ -1705,9 +1709,13 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, int ret = dlm_dispatch_assert_master(dlm, res, 0, 0, flags); if (ret < 0) { - mlog_errno(-ENOMEM); - /* retry!? */ - BUG(); + mlog_errno(ret); + spin_unlock(&res->spinlock); + dlm_lockres_put(res); + spin_unlock(&dlm->spinlock); + dlm_put(dlm); + /* sender will take care of this and retry */ + return ret; } else __dlm_lockres_grab_inflight_worker(dlm, res); spin_unlock(&res->spinlock); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 09b7d9dac71..57c40e34f56 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -565,8 +565,8 @@ static int dlmfs_unlink(struct inode *dir, * to acquire a lock, this basically destroys our lockres. */ status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); if (status < 0) { - mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", - dentry->d_name.len, dentry->d_name.name, status); + mlog(ML_ERROR, "unlink %pd, error %d from destroy\n", + dentry, status); goto bail; } status = simple_unlink(dir, dentry); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 21262f2b165..1c423af04c6 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -861,8 +861,13 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing * the OCFS2_LOCK_BUSY flag to prevent the dc thread from * downconverting the lock before the upconvert has fully completed. + * Do not prevent the dc thread from downconverting if NONBLOCK lock + * had already returned. */ - lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); + if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED)) + lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); + else + lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED); lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); } @@ -1324,13 +1329,12 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, /* returns 0 if the mw that was removed was already satisfied, -EBUSY * if the mask still hadn't reached its goal */ -static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, +static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, struct ocfs2_mask_waiter *mw) { - unsigned long flags; int ret = 0; - spin_lock_irqsave(&lockres->l_lock, flags); + assert_spin_locked(&lockres->l_lock); if (!list_empty(&mw->mw_item)) { if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) ret = -EBUSY; @@ -1338,6 +1342,18 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, list_del_init(&mw->mw_item); init_completion(&mw->mw_complete); } + + return ret; +} + +static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, + struct ocfs2_mask_waiter *mw) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&lockres->l_lock, flags); + ret = __lockres_remove_mask_waiter(lockres, mw); spin_unlock_irqrestore(&lockres->l_lock, flags); return ret; @@ -1373,6 +1389,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, unsigned long flags; unsigned int gen; int noqueue_attempted = 0; + int dlm_locked = 0; ocfs2_init_mask_waiter(&mw); @@ -1481,6 +1498,7 @@ again: ocfs2_recover_from_dlm_error(lockres, 1); goto out; } + dlm_locked = 1; mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", lockres->l_name); @@ -1514,10 +1532,17 @@ out: if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { wait = 0; - if (lockres_remove_mask_waiter(lockres, &mw)) + spin_lock_irqsave(&lockres->l_lock, flags); + if (__lockres_remove_mask_waiter(lockres, &mw)) { + if (dlm_locked) + lockres_or_flags(lockres, + OCFS2_LOCK_NONBLOCK_FINISHED); + spin_unlock_irqrestore(&lockres->l_lock, flags); ret = -EAGAIN; - else + } else { + spin_unlock_irqrestore(&lockres->l_lock, flags); goto again; + } } if (wait) { ret = ocfs2_wait_for_mask(&mw); @@ -3725,8 +3750,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, break; spin_unlock(&dentry_attach_lock); - mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, - dentry->d_name.name); + mlog(0, "d_delete(%pd);\n", dentry); /* * The following dcache calls may do an diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 324dc93ac89..69fb9f75b08 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2381,9 +2381,7 @@ out_dio: if (ret < 0) written = ret; - if (!ret && ((old_size != i_size_read(inode)) || - (old_clusters != OCFS2_I(inode)->ip_clusters) || - has_refcount)) { + if (!ret) { ret = jbd2_journal_force_commit(osb->journal->j_journal); if (ret < 0) written = ret; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 437de7f768c..c8b25de9efb 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -540,8 +540,7 @@ bail: if (status < 0) make_bad_inode(inode); - if (args && bh) - brelse(bh); + brelse(bh); return status; } diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index a9b76de4604..ca3431ee7f2 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -80,6 +80,8 @@ struct ocfs2_inode_info */ tid_t i_sync_tid; tid_t i_datasync_tid; + + struct dquot *i_dquot[MAXQUOTAS]; }; /* diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4b0c68849b3..4f502382180 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1982,10 +1982,12 @@ struct ocfs2_orphan_filldir_priv { struct ocfs2_super *osb; }; -static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, - loff_t pos, u64 ino, unsigned type) +static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name, + int name_len, loff_t pos, u64 ino, + unsigned type) { - struct ocfs2_orphan_filldir_priv *p = priv; + struct ocfs2_orphan_filldir_priv *p = + container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx); struct inode *iter; if (name_len == 1 && !strncmp(".", name, 1)) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 74caffeeee1..56a768d06aa 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -904,9 +904,6 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) struct buffer_head *di_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if (!inode) - return -ENOENT; - if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) return -EROFS; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index bbec539230f..7d6b7d09045 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -144,6 +144,12 @@ enum ocfs2_unlock_action { * before the upconvert * has completed */ +#define OCFS2_LOCK_NONBLOCK_FINISHED (0x00001000) /* NONBLOCK cluster + * lock has already + * returned, do not block + * dc thread from + * downconverting */ + struct ocfs2_lock_res_ops; typedef void (*ocfs2_lock_callback)(int status, unsigned long data); diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index a88b2a4fcc8..d5493e361a3 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -306,7 +306,7 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, assert_spin_locked(&osb->osb_lock); BUG_ON(slot_num < 0); - BUG_ON(slot_num > osb->max_slots); + BUG_ON(slot_num >= osb->max_slots); if (!si->si_slots[slot_num].sl_valid) return -ENOENT; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 93c85bc745e..83723179e1e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -143,6 +143,11 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); static int ocfs2_enable_quotas(struct ocfs2_super *osb); static void ocfs2_disable_quotas(struct ocfs2_super *osb); +static struct dquot **ocfs2_get_dquots(struct inode *inode) +{ + return OCFS2_I(inode)->i_dquot; +} + static const struct super_operations ocfs2_sops = { .statfs = ocfs2_statfs, .alloc_inode = ocfs2_alloc_inode, @@ -155,6 +160,7 @@ static const struct super_operations ocfs2_sops = { .show_options = ocfs2_show_options, .quota_read = ocfs2_quota_read, .quota_write = ocfs2_quota_write, + .get_dquots = ocfs2_get_dquots, }; enum { @@ -563,6 +569,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) oi->i_sync_tid = 0; oi->i_datasync_tid = 0; + memset(&oi->i_dquot, 0, sizeof(oi->i_dquot)); jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); return &oi->vfs_inode; @@ -1622,8 +1629,9 @@ static int __init ocfs2_init(void) ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); if (!ocfs2_debugfs_root) { - status = -EFAULT; + status = -ENOMEM; mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); + goto out4; } ocfs2_set_locking_protocol(); @@ -2073,6 +2081,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_export_op = &ocfs2_export_ops; sb->s_qcop = &ocfs2_quotactl_ops; sb->dq_op = &ocfs2_quota_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb->s_xattr = ocfs2_xattr_handlers; sb->s_time_gran = 1; sb->s_flags |= MS_NOATIME; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 016f01df382..662f8dee149 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1284,7 +1284,7 @@ int ocfs2_xattr_get_nolock(struct inode *inode, return -EOPNOTSUPP; if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) - ret = -ENODATA; + return -ENODATA; xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; diff --git a/fs/open.c b/fs/open.c index de92c13b58b..b1bf3d542d5 100644 --- a/fs/open.c +++ b/fs/open.c @@ -516,7 +516,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) int err = -EBADF; if (f.file) { - audit_inode(NULL, f.file->f_path.dentry, 0); + audit_file(f.file); err = chmod_common(&f.file->f_path, mode); fdput(f); } @@ -642,7 +642,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) error = mnt_want_write_file(f.file); if (error) goto out_fput; - audit_inode(NULL, f.file->f_path.dentry, 0); + audit_file(f.file); error = chown_common(&f.file->f_path, user, group); mnt_drop_write_file(f.file); out_fput: diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index e6012597687..34355818a2e 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,4 +1,4 @@ -config OVERLAYFS_FS +config OVERLAY_FS tristate "Overlay filesystem support" help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 8f91889480d..900daed3e91 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -2,6 +2,6 @@ # Makefile for the overlay filesystem. # -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o +obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o inode.o dir.o readdir.o copy_up.o diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 15cd91ad994..8ffc4b980f1 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -284,8 +284,7 @@ out: return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, - enum ovl_path_type type) +static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) { int err; struct dentry *ret = NULL; @@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, err = ovl_check_empty_dir(dentry, &list); if (err) ret = ERR_PTR(err); - else if (type == OVL_PATH_MERGE) - ret = ovl_clear_empty(dentry, &list); + else { + /* + * If no upperdentry then skip clearing whiteouts. + * + * Can race with copy-up, since we don't hold the upperdir + * mutex. Doesn't matter, since copy-up can't create a + * non-empty directory from an empty one. + */ + if (ovl_dentry_upper(dentry)) + ret = ovl_clear_empty(dentry, &list); + } ovl_cache_free(&list); @@ -487,8 +495,7 @@ out: return err; } -static int ovl_remove_and_whiteout(struct dentry *dentry, - enum ovl_path_type type, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) { struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; @@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, int err; if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry, type); + opaquedir = ovl_check_empty_and_clear(dentry); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, if (IS_ERR(whiteout)) goto out_unlock; - if (type == OVL_PATH_LOWER) { + upper = ovl_dentry_upper(dentry); + if (!upper) { upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto kill_whiteout; @@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, } else { int flags = 0; - upper = ovl_dentry_upper(dentry); if (opaquedir) upper = opaquedir; err = -ESTALE; @@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) cap_raise(override_cred->cap_effective, CAP_CHOWN); old_cred = override_creds(override_cred); - err = ovl_remove_and_whiteout(dentry, type, is_dir); + err = ovl_remove_and_whiteout(dentry, is_dir); revert_creds(old_cred); put_cred(override_cred); @@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, } if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { - opaquedir = ovl_check_empty_and_clear(new, new_type); + opaquedir = ovl_check_empty_and_clear(new); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index af2d18c9fce..07d74b24913 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -235,26 +235,36 @@ out: return err; } +static bool ovl_need_xattr_filter(struct dentry *dentry, + enum ovl_path_type type) +{ + return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); +} + ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); + return vfs_getxattr(realpath.dentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); ssize_t res; int off; - res = vfs_listxattr(ovl_dentry_real(dentry), list, size); + res = vfs_listxattr(realpath.dentry, list, size); if (res <= 0 || size == 0) return res; - if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) + if (!ovl_need_xattr_filter(dentry, type)) return res; /* filter out private xattrs */ @@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name) { int err; struct path realpath; - enum ovl_path_type type; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); err = ovl_want_write(dentry); if (err) goto out; - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + err = -ENODATA; + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) goto out_drop_write; - type = ovl_path_real(dentry, &realpath); if (type == OVL_PATH_LOWER) { err = vfs_getxattr(realpath.dentry, name, NULL, 0); if (err < 0) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 4e9d7c1fea5..c0205990a9f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -168,7 +168,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) { struct ovl_dir_cache *cache = od->cache; - list_del(&od->cursor.l_node); + list_del_init(&od->cursor.l_node); WARN_ON(cache->refcount <= 0); cache->refcount--; if (!cache->refcount) { @@ -180,10 +180,12 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) } } -static int ovl_fill_merge(void *buf, const char *name, int namelen, - loff_t offset, u64 ino, unsigned int d_type) +static int ovl_fill_merge(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) { - struct ovl_readdir_data *rdd = buf; + struct ovl_readdir_data *rdd = + container_of(ctx, struct ovl_readdir_data, ctx); rdd->count++; if (!rdd->is_merge) @@ -274,11 +276,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, return 0; } -static inline int ovl_dir_read_merged(struct path *upperpath, - struct path *lowerpath, - struct list_head *list) +static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) { int err; + struct path lowerpath; + struct path upperpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, .list = list, @@ -286,25 +288,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath, .is_merge = false, }; - if (upperpath->dentry) { - err = ovl_dir_read(upperpath, &rdd); + ovl_path_lower(dentry, &lowerpath); + ovl_path_upper(dentry, &upperpath); + + if (upperpath.dentry) { + err = ovl_dir_read(&upperpath, &rdd); if (err) goto out; - if (lowerpath->dentry) { - err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); + if (lowerpath.dentry) { + err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); if (err) goto out; } } - if (lowerpath->dentry) { + if (lowerpath.dentry) { /* * Insert lowerpath entries before upperpath ones, this allows * offsets to be reasonably constant */ list_add(&rdd.middle, rdd.list); rdd.is_merge = true; - err = ovl_dir_read(lowerpath, &rdd); + err = ovl_dir_read(&lowerpath, &rdd); list_del(&rdd.middle); } out: @@ -329,8 +334,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) { int res; - struct path lowerpath; - struct path upperpath; struct ovl_dir_cache *cache; cache = ovl_dir_cache(dentry); @@ -347,10 +350,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) cache->refcount = 1; INIT_LIST_HEAD(&cache->entries); - ovl_path_lower(dentry, &lowerpath); - ovl_path_upper(dentry, &upperpath); - - res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); + res = ovl_dir_read_merged(dentry, &cache->entries); if (res) { ovl_cache_free(&cache->entries); kfree(cache); @@ -452,10 +452,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { + if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { struct inode *inode = file_inode(file); - realfile =lockless_dereference(od->upperfile); + realfile = lockless_dereference(od->upperfile); if (!realfile) { struct path upperpath; @@ -538,14 +538,9 @@ const struct file_operations ovl_dir_operations = { int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) { int err; - struct path lowerpath; - struct path upperpath; struct ovl_cache_entry *p; - ovl_path_upper(dentry, &upperpath); - ovl_path_lower(dentry, &lowerpath); - - err = ovl_dir_read_merged(&upperpath, &lowerpath, list); + err = ovl_dir_read_merged(dentry, list); if (err) return err; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 08b704cebfc..f16d318b71f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); -#define OVERLAYFS_SUPER_MAGIC 0x794c764f +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 struct ovl_config { char *lowerdir; @@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) { - struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); - /* - * Make sure to order reads to upperdentry wrt ovl_dentry_update() - */ - smp_read_barrier_depends(); - return upperdentry; + return lockless_dereference(oe->__upperdentry); } void ovl_path_upper(struct dentry *dentry, struct path *path) @@ -462,11 +457,34 @@ static const match_table_t ovl_tokens = { {OPT_ERR, NULL} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; - while ((p = strsep(&opt, ",")) != NULL) { + while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -554,15 +572,34 @@ out_dput: goto out_unlock; } +static void ovl_unescape(char *s) +{ + char *d = s; + + for (;; s++, d++) { + if (*s == '\\') + s++; + *d = *s; + if (!*s) + break; + } +} + static int ovl_mount_dir(const char *name, struct path *path) { int err; + char *tmp = kstrdup(name, GFP_KERNEL); + + if (!tmp) + return -ENOMEM; - err = kern_path(name, LOOKUP_FOLLOW, path); + ovl_unescape(tmp); + err = kern_path(tmp, LOOKUP_FOLLOW, path); if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); err = -EINVAL; } + kfree(tmp); return err; } @@ -776,11 +813,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, static struct file_system_type ovl_fs_type = { .owner = THIS_MODULE, - .name = "overlayfs", + .name = "overlay", .mount = ovl_mount, .kill_sb = kill_anon_super, }; -MODULE_ALIAS_FS("overlayfs"); +MODULE_ALIAS_FS("overlay"); static int __init ovl_init(void) { diff --git a/fs/proc/array.c b/fs/proc/array.c index cd3653e4f35..bd117d065b8 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -157,20 +157,29 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; int g; - struct fdtable *fdt = NULL; + struct task_struct *tracer; const struct cred *cred; - pid_t ppid, tpid; + pid_t ppid, tpid = 0, tgid, ngid; + unsigned int max_fds = 0; rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; - tpid = 0; - if (pid_alive(p)) { - struct task_struct *tracer = ptrace_parent(p); - if (tracer) - tpid = task_pid_nr_ns(tracer, ns); - } + + tracer = ptrace_parent(p); + if (tracer) + tpid = task_pid_nr_ns(tracer, ns); + + tgid = task_tgid_nr_ns(p, ns); + ngid = task_numa_group_id(p); cred = get_task_cred(p); + + task_lock(p); + if (p->files) + max_fds = files_fdtable(p->files)->max_fds; + task_unlock(p); + rcu_read_unlock(); + seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" @@ -179,12 +188,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n", + "Gid:\t%d\t%d\t%d\t%d\n" + "FDSize:\t%d\nGroups:\t", get_task_state(p), - task_tgid_nr_ns(p, ns), - task_numa_group_id(p), - pid_nr_ns(pid, ns), - ppid, tpid, + tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid, from_kuid_munged(user_ns, cred->uid), from_kuid_munged(user_ns, cred->euid), from_kuid_munged(user_ns, cred->suid), @@ -192,20 +199,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, from_kgid_munged(user_ns, cred->gid), from_kgid_munged(user_ns, cred->egid), from_kgid_munged(user_ns, cred->sgid), - from_kgid_munged(user_ns, cred->fsgid)); - - task_lock(p); - if (p->files) - fdt = files_fdtable(p->files); - seq_printf(m, - "FDSize:\t%d\n" - "Groups:\t", - fdt ? fdt->max_fds : 0); - rcu_read_unlock(); + from_kgid_munged(user_ns, cred->fsgid), + max_fds); group_info = cred->group_info; - task_unlock(p); - for (g = 0; g < group_info->ngroups; g++) seq_printf(m, "%d ", from_kgid_munged(user_ns, GROUP_AT(group_info, g))); diff --git a/fs/proc/base.c b/fs/proc/base.c index 772efa45a45..590aeda5af1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2618,6 +2618,9 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) dput(dentry); } + if (pid == tgid) + return; + name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); @@ -2789,7 +2792,7 @@ retry: int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info; + struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) @@ -3095,7 +3098,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = file->f_dentry->d_sb->s_fs_info; + ns = inode->i_sb->s_fs_info; tid = (int)file->f_version; file->f_version = 0; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index e11d7c590bb..8e5ad83b629 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -53,7 +53,8 @@ static int seq_show(struct seq_file *m, void *v) (long long)file->f_pos, f_flags, real_mount(file->f_path.mnt)->mnt_id); if (file->f_op->show_fdinfo) - ret = file->f_op->show_fdinfo(m, file); + file->f_op->show_fdinfo(m, file); + ret = seq_has_overflowed(m); fput(file); } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 317b72641eb..7fea13229f3 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -31,9 +31,73 @@ static DEFINE_SPINLOCK(proc_subdir_lock); static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) { - if (de->namelen != len) - return 0; - return !memcmp(name, de->name, len); + if (len < de->namelen) + return -1; + if (len > de->namelen) + return 1; + + return memcmp(name, de->name, len); +} + +static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) +{ + return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, + subdir_node); +} + +static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) +{ + return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry, + subdir_node); +} + +static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, + const char *name, + unsigned int len) +{ + struct rb_node *node = dir->subdir.rb_node; + + while (node) { + struct proc_dir_entry *de = container_of(node, + struct proc_dir_entry, + subdir_node); + int result = proc_match(len, name, de); + + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return de; + } + return NULL; +} + +static bool pde_subdir_insert(struct proc_dir_entry *dir, + struct proc_dir_entry *de) +{ + struct rb_root *root = &dir->subdir; + struct rb_node **new = &root->rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct proc_dir_entry *this = + container_of(*new, struct proc_dir_entry, subdir_node); + int result = proc_match(de->namelen, de->name, this); + + parent = *new; + if (result < 0) + new = &(*new)->rb_left; + else if (result > 0) + new = &(*new)->rb_right; + else + return false; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&de->subdir_node, parent, new); + rb_insert_color(&de->subdir_node, root); + return true; } static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) @@ -92,10 +156,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, break; len = next - cp; - for (de = de->subdir; de ; de = de->next) { - if (proc_match(len, cp, de)) - break; - } + de = pde_subdir_find(de, cp, len); if (!de) { WARN(1, "name '%s'\n", name); return -ENOENT; @@ -183,19 +244,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct inode *inode; spin_lock(&proc_subdir_lock); - for (de = de->subdir; de ; de = de->next) { - if (de->namelen != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - pde_get(de); - spin_unlock(&proc_subdir_lock); - inode = proc_get_inode(dir->i_sb, de); - if (!inode) - return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &simple_dentry_operations); - d_add(dentry, inode); - return NULL; - } + de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); + if (de) { + pde_get(de); + spin_unlock(&proc_subdir_lock); + inode = proc_get_inode(dir->i_sb, de); + if (!inode) + return ERR_PTR(-ENOMEM); + d_set_d_op(dentry, &simple_dentry_operations); + d_add(dentry, inode); + return NULL; } spin_unlock(&proc_subdir_lock); return ERR_PTR(-ENOENT); @@ -225,7 +283,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, return 0; spin_lock(&proc_subdir_lock); - de = de->subdir; + de = pde_subdir_first(de); i = ctx->pos - 2; for (;;) { if (!de) { @@ -234,7 +292,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, } if (!i) break; - de = de->next; + de = pde_subdir_next(de); i--; } @@ -249,7 +307,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, } spin_lock(&proc_subdir_lock); ctx->pos++; - next = de->next; + next = pde_subdir_next(de); pde_put(de); de = next; } while (de); @@ -286,9 +344,8 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { - struct proc_dir_entry *tmp; int ret; - + ret = proc_alloc_inum(&dp->low_ino); if (ret) return ret; @@ -304,21 +361,21 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp dp->proc_iops = &proc_file_inode_operations; } else { WARN_ON(1); + proc_free_inum(dp->low_ino); return -EINVAL; } spin_lock(&proc_subdir_lock); - - for (tmp = dir->subdir; tmp; tmp = tmp->next) - if (strcmp(tmp->name, dp->name) == 0) { - WARN(1, "proc_dir_entry '%s/%s' already registered\n", - dir->name, dp->name); - break; - } - - dp->next = dir->subdir; dp->parent = dir; - dir->subdir = dp; + if (pde_subdir_insert(dir, dp) == false) { + WARN(1, "proc_dir_entry '%s/%s' already registered\n", + dir->name, dp->name); + spin_unlock(&proc_subdir_lock); + if (S_ISDIR(dp->mode)) + dir->nlink--; + proc_free_inum(dp->low_ino); + return -EEXIST; + } spin_unlock(&proc_subdir_lock); return 0; @@ -354,6 +411,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->namelen = qstr.len; ent->mode = mode; ent->nlink = nlink; + ent->subdir = RB_ROOT; atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); @@ -485,7 +543,6 @@ void pde_put(struct proc_dir_entry *pde) */ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) { - struct proc_dir_entry **p; struct proc_dir_entry *de = NULL; const char *fn = name; unsigned int len; @@ -497,14 +554,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) } len = strlen(fn); - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - de = *p; - *p = de->next; - de->next = NULL; - break; - } - } + de = pde_subdir_find(parent, fn, len); + if (de) + rb_erase(&de->subdir_node, &parent->subdir); spin_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -516,16 +568,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - WARN(de->subdir, "%s: removing non-empty directory " - "'%s/%s', leaking at least '%s'\n", __func__, - de->parent->name, de->name, de->subdir->name); + WARN(pde_subdir_first(de), + "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", + __func__, de->parent->name, de->name, pde_subdir_first(de)->name); pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { - struct proc_dir_entry **p; struct proc_dir_entry *root = NULL, *de, *next; const char *fn = name; unsigned int len; @@ -537,24 +588,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) } len = strlen(fn); - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - root = *p; - *p = root->next; - root->next = NULL; - break; - } - } + root = pde_subdir_find(parent, fn, len); if (!root) { spin_unlock(&proc_subdir_lock); return -ENOENT; } + rb_erase(&root->subdir_node, &parent->subdir); + de = root; while (1) { - next = de->subdir; + next = pde_subdir_first(de); if (next) { - de->subdir = next->next; - next->next = NULL; + rb_erase(&next->subdir_node, &de->subdir); de = next; continue; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index aa7a0ee182e..7fb1a4869fd 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -24,10 +24,9 @@ struct mempolicy; * tree) of these proc_dir_entries, so that we can dynamically * add new files to /proc. * - * The "next" pointer creates a linked list of one /proc directory, - * while parent/subdir create the directory structure (every - * /proc file has a parent, but "subdir" is NULL for all - * non-directory entries). + * parent/subdir are used for the directory structure (every /proc file has a + * parent, but "subdir" is empty for all non-directory entries). + * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { unsigned int low_ino; @@ -38,7 +37,9 @@ struct proc_dir_entry { loff_t size; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; - struct proc_dir_entry *next, *parent, *subdir; + struct proc_dir_entry *parent; + struct rb_root subdir; + struct rb_node subdir_node; void *data; atomic_t count; /* use count */ atomic_t in_use; /* number of callers into module in progress; */ diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a63af3e0a61..1bde894bc62 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net) if (!netd) goto out; + netd->subdir = RB_ROOT; netd->data = net; netd->nlink = 2; netd->namelen = 3; diff --git a/fs/proc/root.c b/fs/proc/root.c index 094e44d4a6b..e74ac9f1a2c 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = { .proc_iops = &proc_root_inode_operations, .proc_fops = &proc_root_operations, .parent = &proc_root, + .subdir = RB_ROOT, .name = "/proc", }; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4e0388cffe3..246eae84b13 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -447,59 +447,92 @@ struct mem_size_stats { u64 pss; }; +static void smaps_account(struct mem_size_stats *mss, struct page *page, + unsigned long size, bool young, bool dirty) +{ + int mapcount; + + if (PageAnon(page)) + mss->anonymous += size; -static void smaps_pte_entry(pte_t ptent, unsigned long addr, - unsigned long ptent_size, struct mm_walk *walk) + mss->resident += size; + /* Accumulate the size in pages that have been accessed. */ + if (young || PageReferenced(page)) + mss->referenced += size; + mapcount = page_mapcount(page); + if (mapcount >= 2) { + u64 pss_delta; + + if (dirty || PageDirty(page)) + mss->shared_dirty += size; + else + mss->shared_clean += size; + pss_delta = (u64)size << PSS_SHIFT; + do_div(pss_delta, mapcount); + mss->pss += pss_delta; + } else { + if (dirty || PageDirty(page)) + mss->private_dirty += size; + else + mss->private_clean += size; + mss->pss += (u64)size << PSS_SHIFT; + } +} + +static void smaps_pte_entry(pte_t *pte, unsigned long addr, + struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pgoff_t pgoff = linear_page_index(vma, addr); struct page *page = NULL; - int mapcount; - if (pte_present(ptent)) { - page = vm_normal_page(vma, addr, ptent); - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + if (pte_present(*pte)) { + page = vm_normal_page(vma, addr, *pte); + } else if (is_swap_pte(*pte)) { + swp_entry_t swpent = pte_to_swp_entry(*pte); if (!non_swap_entry(swpent)) - mss->swap += ptent_size; + mss->swap += PAGE_SIZE; else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); - } else if (pte_file(ptent)) { - if (pte_to_pgoff(ptent) != pgoff) - mss->nonlinear += ptent_size; + } else if (pte_file(*pte)) { + if (pte_to_pgoff(*pte) != pgoff) + mss->nonlinear += PAGE_SIZE; } if (!page) return; - if (PageAnon(page)) - mss->anonymous += ptent_size; - if (page->index != pgoff) - mss->nonlinear += ptent_size; + mss->nonlinear += PAGE_SIZE; - mss->resident += ptent_size; - /* Accumulate the size in pages that have been accessed. */ - if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += ptent_size; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += ptent_size; - else - mss->shared_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT) / mapcount; - } else { - if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += ptent_size; - else - mss->private_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT); - } + smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = mss->vma; + struct page *page; + + /* FOLL_DUMP will return -EFAULT on huge zero page */ + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + if (IS_ERR_OR_NULL(page)) + return; + mss->anonymous_thp += HPAGE_PMD_SIZE; + smaps_account(mss, page, HPAGE_PMD_SIZE, + pmd_young(*pmd), pmd_dirty(*pmd)); +} +#else +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ +} +#endif + static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -509,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, spinlock_t *ptl; if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); - mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -524,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) - smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); + smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; @@ -552,6 +584,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_DENYWRITE)] = "dw", +#ifdef CONFIG_X86_INTEL_MPX + [ilog2(VM_MPX)] = "mp", +#endif [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index fafb7a02a5d..50416602774 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -36,6 +36,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/uaccess.h> +#include <linux/syslog.h> #include "internal.h" @@ -120,6 +121,18 @@ static const struct seq_operations pstore_ftrace_seq_ops = { .show = pstore_ftrace_seq_show, }; +static int pstore_check_syslog_permissions(struct pstore_private *ps) +{ + switch (ps->type) { + case PSTORE_TYPE_DMESG: + case PSTORE_TYPE_CONSOLE: + return check_syslog_permissions(SYSLOG_ACTION_READ_ALL, + SYSLOG_FROM_READER); + default: + return 0; + } +} + static ssize_t pstore_file_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -138,6 +151,10 @@ static int pstore_file_open(struct inode *inode, struct file *file) int err; const struct seq_operations *sops = NULL; + err = pstore_check_syslog_permissions(ps); + if (err) + return err; + if (ps->type == PSTORE_TYPE_FTRACE) sops = &pstore_ftrace_seq_ops; @@ -174,6 +191,11 @@ static const struct file_operations pstore_file_operations = { static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = dentry->d_inode->i_private; + int err; + + err = pstore_check_syslog_permissions(p); + if (err) + return err; if (p->psi->erase) p->psi->erase(p->type, p->id, p->count, diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 3b5744306ed..ec881b31270 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -135,25 +135,27 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, return prz; } -static void ramoops_read_kmsg_hdr(char *buffer, struct timespec *time, +static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time, bool *compressed) { char data_type; + int header_length = 0; - if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n", - &time->tv_sec, &time->tv_nsec, &data_type) == 3) { + if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n%n", &time->tv_sec, + &time->tv_nsec, &data_type, &header_length) == 3) { if (data_type == 'C') *compressed = true; else *compressed = false; - } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", - &time->tv_sec, &time->tv_nsec) == 2) { + } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n%n", + &time->tv_sec, &time->tv_nsec, &header_length) == 2) { *compressed = false; } else { time->tv_sec = 0; time->tv_nsec = 0; *compressed = false; } + return header_length; } static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, @@ -165,6 +167,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, ssize_t ecc_notice_size; struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz; + int header_length; prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, cxt->max_dump_cnt, id, type, @@ -178,7 +181,13 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz) return 0; + if (!persistent_ram_old(prz)) + return 0; + size = persistent_ram_old_size(prz); + header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz), time, + compressed); + size -= header_length; /* ECC correction notice */ ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); @@ -187,8 +196,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (*buf == NULL) return -ENOMEM; - memcpy(*buf, persistent_ram_old(prz), size); - ramoops_read_kmsg_hdr(*buf, time, compressed); + memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size); persistent_ram_ecc_string(prz, *buf + size, ecc_notice_size + 1); return size + ecc_notice_size; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 6b4527216a7..8f0acef3d18 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -893,6 +893,11 @@ out: } EXPORT_SYMBOL(dqget); +static inline struct dquot **i_dquot(struct inode *inode) +{ + return inode->i_sb->s_op->get_dquots(inode); +} + static int dqinit_needed(struct inode *inode, int type) { int cnt; @@ -900,9 +905,9 @@ static int dqinit_needed(struct inode *inode, int type) if (IS_NOQUOTA(inode)) return 0; if (type != -1) - return !inode->i_dquot[type]; + return !i_dquot(inode)[type]; for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (!inode->i_dquot[cnt]) + if (!i_dquot(inode)[cnt]) return 1; return 0; } @@ -965,9 +970,9 @@ static void add_dquot_ref(struct super_block *sb, int type) static void remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) { - struct dquot *dquot = inode->i_dquot[type]; + struct dquot *dquot = i_dquot(inode)[type]; - inode->i_dquot[type] = NULL; + i_dquot(inode)[type] = NULL; if (!dquot) return; @@ -1402,7 +1407,7 @@ static void __dquot_initialize(struct inode *inode, int type) * we check it without locking here to avoid unnecessary * dqget()/dqput() calls. */ - if (inode->i_dquot[cnt]) + if (i_dquot(inode)[cnt]) continue; init_needed = 1; @@ -1433,8 +1438,8 @@ static void __dquot_initialize(struct inode *inode, int type) /* We could race with quotaon or dqget() could have failed */ if (!got[cnt]) continue; - if (!inode->i_dquot[cnt]) { - inode->i_dquot[cnt] = got[cnt]; + if (!i_dquot(inode)[cnt]) { + i_dquot(inode)[cnt] = got[cnt]; got[cnt] = NULL; /* * Make quota reservation system happy if someone @@ -1442,7 +1447,7 @@ static void __dquot_initialize(struct inode *inode, int type) */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) - dquot_resv_space(inode->i_dquot[cnt], rsv); + dquot_resv_space(i_dquot(inode)[cnt], rsv); } } out_err: @@ -1472,8 +1477,8 @@ static void __dquot_drop(struct inode *inode) spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - put[cnt] = inode->i_dquot[cnt]; - inode->i_dquot[cnt] = NULL; + put[cnt] = i_dquot(inode)[cnt]; + i_dquot(inode)[cnt] = NULL; } spin_unlock(&dq_data_lock); dqput_all(put); @@ -1494,7 +1499,7 @@ void dquot_drop(struct inode *inode) * add quota pointers back anyway. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) + if (i_dquot(inode)[cnt]) break; } @@ -1595,7 +1600,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; - struct dquot **dquots = inode->i_dquot; + struct dquot **dquots = i_dquot(inode); int reserve = flags & DQUOT_SPACE_RESERVE; if (!dquot_active(inode)) { @@ -1643,11 +1648,11 @@ EXPORT_SYMBOL(__dquot_alloc_space); /* * This operation can block, but only after everything is updated */ -int dquot_alloc_inode(const struct inode *inode) +int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots = inode->i_dquot; + struct dquot * const *dquots = i_dquot(inode); if (!dquot_active(inode)) return 0; @@ -1696,14 +1701,14 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_claim_reserved_space(inode->i_dquot[cnt], + if (i_dquot(inode)[cnt]) + dquot_claim_reserved_space(i_dquot(inode)[cnt], number); } /* Update inode bytes */ inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(i_dquot(inode)); srcu_read_unlock(&dquot_srcu, index); return 0; } @@ -1725,14 +1730,14 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_reclaim_reserved_space(inode->i_dquot[cnt], + if (i_dquot(inode)[cnt]) + dquot_reclaim_reserved_space(i_dquot(inode)[cnt], number); } /* Update inode bytes */ inode_reclaim_rsv_space(inode, number); spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(i_dquot(inode)); srcu_read_unlock(&dquot_srcu, index); return; } @@ -1745,7 +1750,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot **dquots = inode->i_dquot; + struct dquot **dquots = i_dquot(inode); int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!dquot_active(inode)) { @@ -1784,11 +1789,11 @@ EXPORT_SYMBOL(__dquot_free_space); /* * This operation can block, but only after everything is updated */ -void dquot_free_inode(const struct inode *inode) +void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots = inode->i_dquot; + struct dquot * const *dquots = i_dquot(inode); int index; if (!dquot_active(inode)) @@ -1865,7 +1870,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; - transfer_from[cnt] = inode->i_dquot[cnt]; + transfer_from[cnt] = i_dquot(inode)[cnt]; ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]); if (ret) goto over_quota; @@ -1901,7 +1906,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) dquot_incr_space(transfer_to[cnt], cur_space); dquot_resv_space(transfer_to[cnt], rsv_space); - inode->i_dquot[cnt] = transfer_to[cnt]; + i_dquot(inode)[cnt] = transfer_to[cnt]; } spin_unlock(&dq_data_lock); @@ -2743,8 +2748,8 @@ static int __init dquot_init(void) for (i = 0; i < nr_hash; i++) INIT_HLIST_HEAD(dquot_hash + i); - printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n", - nr_hash, order, (PAGE_SIZE << order)); + pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," + " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); register_shrinker(&dqcache_shrinker); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 75621649dbd..2aa4151f99d 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -47,8 +47,11 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, static void quota_sync_one(struct super_block *sb, void *arg) { - if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, *(int *)arg); + int type = *(int *)arg; + + if (sb->s_qcop && sb->s_qcop->quota_sync && + (sb->s_quota_types & (1 << type))) + sb->s_qcop->quota_sync(sb, type); } static int quota_sync_all(int type) @@ -297,8 +300,14 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) return -EINVAL; + /* + * Quota not supported on this fs? Check this before s_quota_types + * since they needn't be set if quota is not supported at all. + */ if (!sb->s_qcop) return -ENOSYS; + if (!(sb->s_quota_types & (1 << type))) + return -EINVAL; ret = check_quotactl_permission(sb, type, cmd, id); if (ret < 0) diff --git a/fs/readdir.c b/fs/readdir.c index 33fd92208cb..ced679179ca 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -74,10 +74,11 @@ struct readdir_callback { int result; }; -static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int fillonedir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { - struct readdir_callback *buf = (struct readdir_callback *) __buf; + struct readdir_callback *buf = + container_of(ctx, struct readdir_callback, ctx); struct old_linux_dirent __user * dirent; unsigned long d_ino; @@ -148,11 +149,12 @@ struct getdents_callback { int error; }; -static int filldir(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int filldir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent __user * dirent; - struct getdents_callback * buf = (struct getdents_callback *) __buf; + struct getdents_callback *buf = + container_of(ctx, struct getdents_callback, ctx); unsigned long d_ino; int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); @@ -232,11 +234,12 @@ struct getdents_callback64 { int error; }; -static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int filldir64(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent64 __user *dirent; - struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; + struct getdents_callback64 *buf = + container_of(ctx, struct getdents_callback64, ctx); int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 1894d96ccb7..bb79cddf0a1 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -97,6 +97,10 @@ struct reiserfs_inode_info { #ifdef CONFIG_REISERFS_FS_XATTR struct rw_semaphore i_xattr_sem; #endif +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif + struct inode vfs_inode; }; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f1376c92cf7..ea63ab13ef9 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -594,6 +594,10 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) return NULL; atomic_set(&ei->openers, 0); mutex_init(&ei->tailpack); +#ifdef CONFIG_QUOTA + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); +#endif + return &ei->vfs_inode; } @@ -750,6 +754,11 @@ static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, loff_t); + +static struct dquot **reiserfs_get_dquots(struct inode *inode) +{ + return REISERFS_I(inode)->i_dquot; +} #endif static const struct super_operations reiserfs_sops = { @@ -768,6 +777,7 @@ static const struct super_operations reiserfs_sops = { #ifdef CONFIG_QUOTA .quota_read = reiserfs_quota_read, .quota_write = reiserfs_quota_write, + .get_dquots = reiserfs_get_dquots, #endif }; @@ -1633,6 +1643,7 @@ static int read_super_block(struct super_block *s, int offset) #ifdef CONFIG_QUOTA s->s_qcop = &reiserfs_qctl_operations; s->dq_op = &reiserfs_quota_operations; + s->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif /* diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 7c36898af40..04b06146bae 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -188,10 +188,11 @@ struct reiserfs_dentry_buf { }; static int -fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, - u64 ino, unsigned int d_type) +fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) { - struct reiserfs_dentry_buf *dbuf = buf; + struct reiserfs_dentry_buf *dbuf = + container_of(ctx, struct reiserfs_dentry_buf, ctx); struct dentry *dentry; WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); @@ -209,9 +210,9 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, } else if (!dentry->d_inode) { /* A directory entry exists, but no file? */ reiserfs_error(dentry->d_sb, "xattr-20003", - "Corrupted directory: xattr %s listed but " - "not found for file %s.\n", - dentry->d_name.name, dbuf->xadir->d_name.name); + "Corrupted directory: xattr %pd listed but " + "not found for file %pd.\n", + dentry, dbuf->xadir); dput(dentry); return -EIO; } @@ -824,10 +825,12 @@ struct listxattr_buf { struct dentry *dentry; }; -static int listxattr_filler(void *buf, const char *name, int namelen, - loff_t offset, u64 ino, unsigned int d_type) +static int listxattr_filler(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) { - struct listxattr_buf *b = (struct listxattr_buf *)buf; + struct listxattr_buf *b = + container_of(ctx, struct listxattr_buf, ctx); size_t size; if (name[0] != '.' || diff --git a/fs/seq_file.c b/fs/seq_file.c index 3857b720cb1..353948ba1c5 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -16,17 +16,6 @@ #include <asm/uaccess.h> #include <asm/page.h> - -/* - * seq_files have a buffer which can may overflow. When this happens a larger - * buffer is reallocated and all the data will be printed again. - * The overflow state is true when m->count == m->size. - */ -static bool seq_overflow(struct seq_file *m) -{ - return m->count == m->size; -} - static void seq_set_overflow(struct seq_file *m) { m->count = m->size; @@ -124,7 +113,7 @@ static int traverse(struct seq_file *m, loff_t offset) error = 0; m->count = 0; } - if (seq_overflow(m)) + if (seq_has_overflowed(m)) goto Eoverflow; if (pos + m->count > offset) { m->from = offset - pos; @@ -267,7 +256,7 @@ Fill: break; } err = m->op->show(m, p); - if (seq_overflow(m) || err) { + if (seq_has_overflowed(m) || err) { m->count = offs; if (likely(err <= 0)) break; diff --git a/fs/signalfd.c b/fs/signalfd.c index 424b7b65321..7e412ad7483 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -230,7 +230,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, } #ifdef CONFIG_PROC_FS -static int signalfd_show_fdinfo(struct seq_file *m, struct file *f) +static void signalfd_show_fdinfo(struct seq_file *m, struct file *f) { struct signalfd_ctx *ctx = f->private_data; sigset_t sigmask; @@ -238,8 +238,6 @@ static int signalfd_show_fdinfo(struct seq_file *m, struct file *f) sigmask = ctx->sigmask; signotset(&sigmask); render_sigset_t(m, "sigmask:\t", &sigmask); - - return 0; } #endif diff --git a/fs/sync.c b/fs/sync.c index bdc729d80e5..01d9f18a70b 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -154,7 +154,7 @@ SYSCALL_DEFINE1(syncfs, int, fd) if (!f.file) return -EBADF; - sb = f.file->f_dentry->d_sb; + sb = f.file->f_path.dentry->d_sb; down_read(&sb->s_umount); ret = sync_filesystem(sb); diff --git a/fs/timerfd.c b/fs/timerfd.c index b46ffa94372..b94fa6c3c6e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -288,7 +288,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, } #ifdef CONFIG_PROC_FS -static int timerfd_show(struct seq_file *m, struct file *file) +static void timerfd_show(struct seq_file *m, struct file *file) { struct timerfd_ctx *ctx = file->private_data; struct itimerspec t; @@ -298,18 +298,19 @@ static int timerfd_show(struct seq_file *m, struct file *file) t.it_interval = ktime_to_timespec(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); - return seq_printf(m, - "clockid: %d\n" - "ticks: %llu\n" - "settime flags: 0%o\n" - "it_value: (%llu, %llu)\n" - "it_interval: (%llu, %llu)\n", - ctx->clockid, (unsigned long long)ctx->ticks, - ctx->settime_flags, - (unsigned long long)t.it_value.tv_sec, - (unsigned long long)t.it_value.tv_nsec, - (unsigned long long)t.it_interval.tv_sec, - (unsigned long long)t.it_interval.tv_nsec); + seq_printf(m, + "clockid: %d\n" + "ticks: %llu\n" + "settime flags: 0%o\n" + "it_value: (%llu, %llu)\n" + "it_interval: (%llu, %llu)\n", + ctx->clockid, + (unsigned long long)ctx->ticks, + ctx->settime_flags, + (unsigned long long)t.it_value.tv_sec, + (unsigned long long)t.it_value.tv_nsec, + (unsigned long long)t.it_interval.tv_sec, + (unsigned long long)t.it_interval.tv_nsec); } #else #define timerfd_show NULL diff --git a/fs/udf/super.c b/fs/udf/super.c index e229315bbf7..3ccb2f11fc7 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2082,12 +2082,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) mutex_init(&sbi->s_alloc_mutex); if (!udf_parse_options((char *)options, &uopt, false)) - goto error_out; + goto parse_options_failure; if (uopt.flags & (1 << UDF_FLAG_UTF8) && uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { udf_err(sb, "utf8 cannot be combined with iocharset\n"); - goto error_out; + goto parse_options_failure; } #ifdef CONFIG_UDF_NLS if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) { @@ -2237,8 +2237,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) return 0; error_out: - if (sbi->s_vat_inode) - iput(sbi->s_vat_inode); + iput(sbi->s_vat_inode); +parse_options_failure: #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); @@ -2291,8 +2291,7 @@ static void udf_put_super(struct super_block *sb) sbi = UDF_SB(sb); - if (sbi->s_vat_inode) - iput(sbi->s_vat_inode); + iput(sbi->s_vat_inode); #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); diff --git a/fs/xattr.c b/fs/xattr.c index 64e83efb742..4ef698549e3 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -405,16 +405,14 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { struct fd f = fdget(fd); - struct dentry *dentry; int error = -EBADF; if (!f.file) return error; - dentry = f.file->f_path.dentry; - audit_inode(NULL, dentry, 0); + audit_file(f.file); error = mnt_want_write_file(f.file); if (!error) { - error = setxattr(dentry, name, value, size, flags); + error = setxattr(f.file->f_path.dentry, name, value, size, flags); mnt_drop_write_file(f.file); } fdput(f); @@ -509,7 +507,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, if (!f.file) return error; - audit_inode(NULL, f.file->f_path.dentry, 0); + audit_file(f.file); error = getxattr(f.file->f_path.dentry, name, value, size); fdput(f); return error; @@ -590,7 +588,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) if (!f.file) return error; - audit_inode(NULL, f.file->f_path.dentry, 0); + audit_file(f.file); error = listxattr(f.file->f_path.dentry, list, size); fdput(f); return error; @@ -651,16 +649,14 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { struct fd f = fdget(fd); - struct dentry *dentry; int error = -EBADF; if (!f.file) return error; - dentry = f.file->f_path.dentry; - audit_inode(NULL, dentry, 0); + audit_file(f.file); error = mnt_want_write_file(f.file); if (!error) { - error = removexattr(dentry, name); + error = removexattr(f.file->f_path.dentry, name); mnt_drop_write_file(f.file); } fdput(f); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 92e8f99a585..281002689d6 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1338,7 +1338,10 @@ xfs_free_file_space( goto out; } - +/* + * Preallocate and zero a range of a file. This mechanism has the allocation + * semantics of fallocate and in addition converts data in the range to zeroes. + */ int xfs_zero_file_space( struct xfs_inode *ip, @@ -1346,65 +1349,30 @@ xfs_zero_file_space( xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; - uint granularity; - xfs_off_t start_boundary; - xfs_off_t end_boundary; + uint blksize; int error; trace_xfs_zero_file_space(ip); - granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); + blksize = 1 << mp->m_sb.sb_blocklog; /* - * Round the range of extents we are going to convert inwards. If the - * offset is aligned, then it doesn't get changed so we zero from the - * start of the block offset points to. + * Punch a hole and prealloc the range. We use hole punch rather than + * unwritten extent conversion for two reasons: + * + * 1.) Hole punch handles partial block zeroing for us. + * + * 2.) If prealloc returns ENOSPC, the file range is still zero-valued + * by virtue of the hole punch. */ - start_boundary = round_up(offset, granularity); - end_boundary = round_down(offset + len, granularity); - - ASSERT(start_boundary >= offset); - ASSERT(end_boundary <= offset + len); - - if (start_boundary < end_boundary - 1) { - /* - * Writeback the range to ensure any inode size updates due to - * appending writes make it to disk (otherwise we could just - * punch out the delalloc blocks). - */ - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - start_boundary, end_boundary - 1); - if (error) - goto out; - truncate_pagecache_range(VFS_I(ip), start_boundary, - end_boundary - 1); - - /* convert the blocks */ - error = xfs_alloc_file_space(ip, start_boundary, - end_boundary - start_boundary - 1, - XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT); - if (error) - goto out; - - /* We've handled the interior of the range, now for the edges */ - if (start_boundary != offset) { - error = xfs_iozero(ip, offset, start_boundary - offset); - if (error) - goto out; - } - - if (end_boundary != offset + len) - error = xfs_iozero(ip, end_boundary, - offset + len - end_boundary); - - } else { - /* - * It's either a sub-granularity range or the range spanned lies - * partially across two adjacent blocks. - */ - error = xfs_iozero(ip, offset, len); - } + error = xfs_free_file_space(ip, offset, len); + if (error) + goto out; + error = xfs_alloc_file_space(ip, round_down(offset, blksize), + round_up(offset + len, blksize) - + round_down(offset, blksize), + XFS_BMAPI_PREALLOC); out: return error; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f1deb961a29..894924a5129 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk( XFS_WANT_CORRUPTED_RETURN(stat == 1); /* Check if the record contains the inode in request */ - if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) - return -EINVAL; + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { + *icount = 0; + return 0; + } idx = agino - irec->ir_startino + 1; if (idx < XFS_INODES_PER_CHUNK && @@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk( #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) +struct xfs_bulkstat_agichunk { + char __user **ac_ubuffer;/* pointer into user's buffer */ + int ac_ubleft; /* bytes left in user's buffer */ + int ac_ubelem; /* spaces used in user's buffer */ +}; + /* * Process inodes in chunk with a pointer to a formatter function * that will iget the inode and fill in the appropriate structure. */ -int +static int xfs_bulkstat_ag_ichunk( struct xfs_mount *mp, xfs_agnumber_t agno, struct xfs_inobt_rec_incore *irbp, bulkstat_one_pf formatter, size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp) + struct xfs_bulkstat_agichunk *acp, + xfs_agino_t *last_agino) { - xfs_ino_t lastino = acp->ac_lastino; char __user **ubufp = acp->ac_ubuffer; - int ubleft = acp->ac_ubleft; - int ubelem = acp->ac_ubelem; - int chunkidx, clustidx; + int chunkidx; int error = 0; - xfs_agino_t agino; + xfs_agino_t agino = irbp->ir_startino; - for (agino = irbp->ir_startino, chunkidx = clustidx = 0; - XFS_BULKSTAT_UBLEFT(ubleft) && - irbp->ir_freecount < XFS_INODES_PER_CHUNK; - chunkidx++, clustidx++, agino++) { - int fmterror; /* bulkstat formatter result */ + for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; + chunkidx++, agino++) { + int fmterror; int ubused; - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); - ASSERT(chunkidx < XFS_INODES_PER_CHUNK); + /* inode won't fit in buffer, we are done */ + if (acp->ac_ubleft < statstruct_size) + break; /* Skip if this inode is free */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { - lastino = ino; + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) continue; - } - - /* - * Count used inodes as free so we can tell when the - * chunk is used up. - */ - irbp->ir_freecount++; /* Get the inode and fill in a single buffer */ ubused = statstruct_size; - error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); - if (fmterror == BULKSTAT_RV_NOTHING) { - if (error && error != -ENOENT && error != -EINVAL) { - ubleft = 0; - break; - } - lastino = ino; - continue; - } - if (fmterror == BULKSTAT_RV_GIVEUP) { - ubleft = 0; + error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), + *ubufp, acp->ac_ubleft, &ubused, &fmterror); + + if (fmterror == BULKSTAT_RV_GIVEUP || + (error && error != -ENOENT && error != -EINVAL)) { + acp->ac_ubleft = 0; ASSERT(error); break; } - if (*ubufp) - *ubufp += ubused; - ubleft -= ubused; - ubelem++; - lastino = ino; + + /* be careful not to leak error if at end of chunk */ + if (fmterror == BULKSTAT_RV_NOTHING || error) { + error = 0; + continue; + } + + *ubufp += ubused; + acp->ac_ubleft -= ubused; + acp->ac_ubelem++; } - acp->ac_lastino = lastino; - acp->ac_ubleft = ubleft; - acp->ac_ubelem = ubelem; + /* + * Post-update *last_agino. At this point, agino will always point one + * inode past the last inode we processed successfully. Hence we + * substract that inode when setting the *last_agino cursor so that we + * return the correct cookie to userspace. On the next bulkstat call, + * the inode under the lastino cookie will be skipped as we have already + * processed it here. + */ + *last_agino = agino - 1; return error; } @@ -353,45 +356,33 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - int end_of_ag; /* set if we've seen the ag end */ - int error; /* error code */ - int fmterror;/* bulkstat formatter result */ - int i; /* loop index */ - int icount; /* count of inodes good in irbuf */ size_t irbsize; /* size of irec buffer in bytes */ - xfs_ino_t ino; /* inode number (filesystem) */ - xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ - xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ - xfs_ino_t lastino; /* last inode number returned */ int nirbuf; /* size of irbuf */ - int rval; /* return value error code */ - int tmp; /* result value from btree calls */ int ubcount; /* size of user's buffer */ - int ubleft; /* bytes left in user's buffer */ - char __user *ubufp; /* pointer into user's buffer */ - int ubelem; /* spaces used in user's buffer */ + struct xfs_bulkstat_agichunk ac; + int error = 0; /* * Get the last inode value, see if there's nothing to do. */ - ino = (xfs_ino_t)*lastinop; - lastino = ino; - agno = XFS_INO_TO_AGNO(mp, ino); - agino = XFS_INO_TO_AGINO(mp, ino); + agno = XFS_INO_TO_AGNO(mp, *lastinop); + agino = XFS_INO_TO_AGINO(mp, *lastinop); if (agno >= mp->m_sb.sb_agcount || - ino != XFS_AGINO_TO_INO(mp, agno, agino)) { + *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { *done = 1; *ubcountp = 0; return 0; } ubcount = *ubcountp; /* statstruct's */ - ubleft = ubcount * statstruct_size; /* bytes */ - *ubcountp = ubelem = 0; + ac.ac_ubuffer = &ubuffer; + ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; + ac.ac_ubelem = 0; + + *ubcountp = 0; *done = 0; - fmterror = 0; - ubufp = ubuffer; + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); if (!irbuf) return -ENOMEM; @@ -402,9 +393,13 @@ xfs_bulkstat( * Loop over the allocation groups, starting from the last * inode returned; 0 means start of the allocation group. */ - rval = 0; - while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { - cond_resched(); + while (agno < mp->m_sb.sb_agcount) { + struct xfs_inobt_rec_incore *irbp = irbuf; + struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; + bool end_of_ag = false; + int icount = 0; + int stat; + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); if (error) break; @@ -414,10 +409,6 @@ xfs_bulkstat( */ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO); - irbp = irbuf; - irbufend = irbuf + nirbuf; - end_of_ag = 0; - icount = 0; if (agino > 0) { /* * In the middle of an allocation group, we need to get @@ -427,22 +418,23 @@ xfs_bulkstat( error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); if (error) - break; + goto del_cursor; if (icount) { irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; irbp++; - agino = r.ir_startino + XFS_INODES_PER_CHUNK; } /* Increment to the next record */ - error = xfs_btree_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &stat); } else { /* Start of ag. Lookup the first inode chunk */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); + } + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; } - if (error) - break; /* * Loop through inode btree records in this ag, @@ -451,10 +443,10 @@ xfs_bulkstat( while (irbp < irbufend && icount < ubcount) { struct xfs_inobt_rec_incore r; - error = xfs_inobt_get_rec(cur, &r, &i); - if (error || i == 0) { - end_of_ag = 1; - break; + error = xfs_inobt_get_rec(cur, &r, &stat); + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; } /* @@ -469,77 +461,79 @@ xfs_bulkstat( irbp++; icount += XFS_INODES_PER_CHUNK - r.ir_freecount; } - /* - * Set agino to after this chunk and bump the cursor. - */ - agino = r.ir_startino + XFS_INODES_PER_CHUNK; - error = xfs_btree_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &stat); + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; + } cond_resched(); } + /* - * Drop the btree buffers and the agi buffer. - * We can't hold any of the locks these represent - * when calling iget. + * Drop the btree buffers and the agi buffer as we can't hold any + * of the locks these represent when calling iget. If there is a + * pending error, then we are done. */ +del_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_buf_relse(agbp); + if (error) + break; /* - * Now format all the good inodes into the user's buffer. + * Now format all the good inodes into the user's buffer. The + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer + * for the next loop iteration. */ irbufend = irbp; for (irbp = irbuf; - irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { - struct xfs_bulkstat_agichunk ac; - - ac.ac_lastino = lastino; - ac.ac_ubuffer = &ubuffer; - ac.ac_ubleft = ubleft; - ac.ac_ubelem = ubelem; + irbp < irbufend && ac.ac_ubleft >= statstruct_size; + irbp++) { error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, - formatter, statstruct_size, &ac); + formatter, statstruct_size, &ac, + &agino); if (error) - rval = error; - - lastino = ac.ac_lastino; - ubleft = ac.ac_ubleft; - ubelem = ac.ac_ubelem; + break; cond_resched(); } + /* - * Set up for the next loop iteration. + * If we've run out of space or had a formatting error, we + * are now done */ - if (XFS_BULKSTAT_UBLEFT(ubleft)) { - if (end_of_ag) { - agno++; - agino = 0; - } else - agino = XFS_INO_TO_AGINO(mp, lastino); - } else + if (ac.ac_ubleft < statstruct_size || error) break; + + if (end_of_ag) { + agno++; + agino = 0; + } } /* * Done, we're either out of filesystem or space to put the data. */ kmem_free(irbuf); - *ubcountp = ubelem; + *ubcountp = ac.ac_ubelem; + /* - * Found some inodes, return them now and return the error next time. + * We found some inodes, so clear the error status and return them. + * The lastino pointer will point directly at the inode that triggered + * any error that occurred, so on the next call the error will be + * triggered again and propagated to userspace as there will be no + * formatted inodes in the buffer. */ - if (ubelem) - rval = 0; - if (agno >= mp->m_sb.sb_agcount) { - /* - * If we ran out of filesystem, mark lastino as off - * the end of the filesystem, so the next call - * will return immediately. - */ - *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); + if (ac.ac_ubelem) + error = 0; + + /* + * If we ran out of filesystem, lastino will point off the end of + * the filesystem so the next call will return immediately. + */ + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); + if (agno >= mp->m_sb.sb_agcount) *done = 1; - } else - *lastinop = (xfs_ino_t)lastino; - return rval; + return error; } int diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index aaed08022eb..6ea8b3912fa 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, int *ubused, int *stat); -struct xfs_bulkstat_agichunk { - xfs_ino_t ac_lastino; /* last inode returned */ - char __user **ac_ubuffer;/* pointer into user's buffer */ - int ac_ubleft; /* bytes left in user's buffer */ - int ac_ubelem; /* spaces used in user's buffer */ -}; - -int -xfs_bulkstat_ag_ichunk( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irbp, - bulkstat_one_pf formatter, - size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp); - /* * Values for stat return value. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 9f622feda6a..206b97fd1d8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1425,6 +1425,7 @@ xfs_fs_fill_super( sb->s_export_op = &xfs_export_operations; #ifdef CONFIG_XFS_QUOTA sb->s_qcop = &xfs_quotactl_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif sb->s_op = &xfs_super_operations; |