From d3ef3d7351ccfbef3e5d926efc5ee332136f40d4 Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:54 +1000 Subject: fs: mnt_want_write speedup This patch speeds up lmbench lat_mmap test by about 8%. lat_mmap is set up basically to mmap a 64MB file on tmpfs, fault in its pages, then unmap it. A microbenchmark yes, but it exercises some important paths in the mm. Before: avg = 501.9 std = 14.7773 After: avg = 462.286 std = 5.46106 (50 runs of each, stddev gives a reasonable confidence, but there is quite a bit of variation there still) It does this by removing the complex per-cpu locking and counter-cache and replaces it with a percpu counter in struct vfsmount. This makes the code much simpler, and avoids spinlocks (although the msync is still pretty costly, unfortunately). It results in about 900 bytes smaller code too. It does increase the size of a vfsmount, however. It should also give a speedup on large systems if CPUs are frequently operating on different mounts (because the existing scheme has to operate on an atomic in the struct vfsmount when switching between mounts). But I'm most interested in the single threaded path performance for the moment. [AV: minor cleanup] Cc: Dave Hansen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/mount.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux/mount.h') diff --git a/include/linux/mount.h b/include/linux/mount.h index 51f55f903af..ac49c1f8e5c 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -30,7 +30,7 @@ struct mnt_namespace; #define MNT_STRICTATIME 0x80 #define MNT_SHRINKABLE 0x100 -#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */ +#define MNT_WRITE_HOLD 0x200 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ @@ -65,13 +65,22 @@ struct vfsmount { int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; - /* - * This value is not stable unless all of the mnt_writers[] spinlocks - * are held, and all mnt_writer[]s on this mount have 0 as their ->count - */ - atomic_t __mnt_writers; +#ifdef CONFIG_SMP + int *mnt_writers; +#else + int mnt_writers; +#endif }; +static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + return mnt->mnt_writers; +#else + return &mnt->mnt_writers; +#endif +} + static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) -- cgit v1.2.3-70-g09d2 From 96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:55 +1000 Subject: fs: introduce mnt_clone_write This patch speeds up lmbench lat_mmap test by about another 2% after the first patch. Before: avg = 462.286 std = 5.46106 After: avg = 453.12 std = 9.58257 (50 runs of each, stddev gives a reasonable confidence) It does this by introducing mnt_clone_write, which avoids some heavyweight operations of mnt_want_write if called on a vfsmount which we know already has a write count; and mnt_want_write_file, which can call mnt_clone_write if the file is open for write. After these two patches, mnt_want_write and mnt_drop_write go from 7% on the profile down to 1.3% (including mnt_clone_write). [AV: mnt_want_write_file() should take file alone and derive mnt from it; not only all callers have that form, but that's the only mnt about which we know that it's already held for write if file is opened for write] Cc: Dave Hansen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/file_table.c | 2 +- fs/inode.c | 2 +- fs/namespace.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/open.c | 4 ++-- fs/xattr.c | 4 ++-- include/linux/mount.h | 4 ++++ 6 files changed, 50 insertions(+), 6 deletions(-) (limited to 'include/linux/mount.h') diff --git a/fs/file_table.c b/fs/file_table.c index 54018fe4884..3d66dbcebef 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, */ if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { file_take_write(file); - error = mnt_want_write(mnt); + error = mnt_clone_write(mnt); WARN_ON(error); } return error; diff --git a/fs/inode.c b/fs/inode.c index ca337014ae2..a88baebf77c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1422,7 +1422,7 @@ void file_update_time(struct file *file) if (IS_NOCMTIME(inode)) return; - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) return; diff --git a/fs/namespace.c b/fs/namespace.c index 22ae06ad751..120b8a6b99e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -264,6 +264,46 @@ out: } EXPORT_SYMBOL_GPL(mnt_want_write); +/** + * mnt_clone_write - get write access to a mount + * @mnt: the mount on which to take a write + * + * This is effectively like mnt_want_write, except + * it must only be used to take an extra write reference + * on a mountpoint that we already know has a write reference + * on it. This allows some optimisation. + * + * After finished, mnt_drop_write must be called as usual to + * drop the reference. + */ +int mnt_clone_write(struct vfsmount *mnt) +{ + /* superblock may be r/o */ + if (__mnt_is_readonly(mnt)) + return -EROFS; + preempt_disable(); + inc_mnt_writers(mnt); + preempt_enable(); + return 0; +} +EXPORT_SYMBOL_GPL(mnt_clone_write); + +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already + */ +int mnt_want_write_file(struct file *file) +{ + if (!(file->f_mode & FMODE_WRITE)) + return mnt_want_write(file->f_path.mnt); + else + return mnt_clone_write(file->f_path.mnt); +} +EXPORT_SYMBOL_GPL(mnt_want_write_file); + /** * mnt_drop_write - give up write access to a mount * @mnt: the mount on which to give up write access diff --git a/fs/open.c b/fs/open.c index bdfbf03615a..7200e23d925 100644 --- a/fs/open.c +++ b/fs/open.c @@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) audit_inode(NULL, dentry); - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) goto out_putf; mutex_lock(&inode->i_mutex); @@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) if (!file) goto out; - error = mnt_want_write(file->f_path.mnt); + error = mnt_want_write_file(file); if (error) goto out_fput; dentry = file->f_path.dentry; diff --git a/fs/xattr.c b/fs/xattr.c index d51b8f9db92..1c3d0af59dd 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = setxattr(dentry, name, value, size, flags); mnt_drop_write(f->f_path.mnt); @@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = removexattr(dentry, name); mnt_drop_write(f->f_path.mnt); diff --git a/include/linux/mount.h b/include/linux/mount.h index ac49c1f8e5c..5d527536486 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -88,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) return mnt; } +struct file; /* forward dec */ + extern int mnt_want_write(struct vfsmount *mnt); +extern int mnt_want_write_file(struct file *file); +extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput_no_expire(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); -- cgit v1.2.3-70-g09d2