From 8c652f96d3852b97a49c331cd0bb02d22f3cb31b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov <oleg@redhat.com> Date: Fri, 24 Apr 2009 01:01:56 +0200 Subject: do_execve() must not clear fs->in_exec if it was set by another thread If do_execve() fails after check_unsafe_exec(), it clears fs->in_exec unconditionally. This is wrong if we race with our sub-thread which also does do_execve: Two threads T1 and T2 and another process P, all share the same ->fs. T1 starts do_execve(BAD_FILE). It calls check_unsafe_exec(), since ->fs is shared, we set LSM_UNSAFE but not ->in_exec. P exits and decrements fs->users. T2 starts do_execve(), calls check_unsafe_exec(), now ->fs is not shared, we set fs->in_exec. T1 continues, open_exec(BAD_FILE) fails, we clear ->in_exec and return to the user-space. T1 does clone(CLONE_FS /* without CLONE_THREAD */). T2 continues without LSM_UNSAFE_SHARE while ->fs is shared with another process. Change check_unsafe_exec() to return res = 1 if we set ->in_exec, and change do_execve() to clear ->in_exec depending on res. When do_execve() suceeds, it is safe to clear ->in_exec unconditionally. It can be set only if we don't share ->fs with another process, and since we already killed all sub-threads either ->in_exec == 0 or we are the only user of this ->fs. Also, we do not need fs->lock to clear fs->in_exec. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Roland McGrath <roland@redhat.com> Acked-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/exec.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 052a961e41a..a2e6989dbc3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1077,9 +1077,11 @@ int check_unsafe_exec(struct linux_binprm *bprm) if (p->fs->users > n_fs) { bprm->unsafe |= LSM_UNSAFE_SHARE; } else { - if (p->fs->in_exec) - res = -EAGAIN; - p->fs->in_exec = 1; + res = -EAGAIN; + if (!p->fs->in_exec) { + p->fs->in_exec = 1; + res = 1; + } } unlock_task_sighand(p, &flags); @@ -1284,6 +1286,7 @@ int do_execve(char * filename, struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; + bool clear_in_exec; int retval; retval = unshare_files(&displaced); @@ -1306,8 +1309,9 @@ int do_execve(char * filename, goto out_unlock; retval = check_unsafe_exec(bprm); - if (retval) + if (retval < 0) goto out_unlock; + clear_in_exec = retval; file = open_exec(filename); retval = PTR_ERR(file); @@ -1355,9 +1359,7 @@ int do_execve(char * filename, goto out; /* execve succeeded */ - write_lock(¤t->fs->lock); current->fs->in_exec = 0; - write_unlock(¤t->fs->lock); current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); @@ -1377,9 +1379,8 @@ out_file: } out_unmark: - write_lock(¤t->fs->lock); - current->fs->in_exec = 0; - write_unlock(¤t->fs->lock); + if (clear_in_exec) + current->fs->in_exec = 0; out_unlock: current->in_execve = 0; -- cgit v1.2.3-70-g09d2 From 437f7fdb607f32b737e4da9f14bebcfdac2c90c3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov <oleg@redhat.com> Date: Fri, 24 Apr 2009 01:02:45 +0200 Subject: check_unsafe_exec: s/lock_task_sighand/rcu_read_lock/ write_lock(¤t->fs->lock) guarantees we can't wrongly miss LSM_UNSAFE_SHARE, this is what we care about. Use rcu_read_lock() instead of ->siglock to iterate over the sub-threads. We must see all CLONE_THREAD|CLONE_FS threads which didn't pass exit_fs(), it takes fs->lock too. With or without this patch we can miss the freshly cloned thread and set LSM_UNSAFE_SHARE, we don't care. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Roland McGrath <roland@redhat.com> [ Fixed lock/unlock typo - Hugh ] Acked-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/exec.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index a2e6989dbc3..a3a8ce83940 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1060,7 +1060,6 @@ EXPORT_SYMBOL(install_exec_creds); int check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; - unsigned long flags; unsigned n_fs; int res = 0; @@ -1068,11 +1067,12 @@ int check_unsafe_exec(struct linux_binprm *bprm) n_fs = 1; write_lock(&p->fs->lock); - lock_task_sighand(p, &flags); + rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) n_fs++; } + rcu_read_unlock(); if (p->fs->users > n_fs) { bprm->unsafe |= LSM_UNSAFE_SHARE; @@ -1083,8 +1083,6 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - - unlock_task_sighand(p, &flags); write_unlock(&p->fs->lock); return res; -- cgit v1.2.3-70-g09d2 From 74641f584da8eccf30becfbb5507ab457187db22 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Date: Thu, 30 Apr 2009 15:08:49 -0700 Subject: alpha: binfmt_aout fix This fixes the problem introduced by commit 3bfacef412 (get rid of special-casing the /sbin/loader on alpha): osf/1 ecoff binary segfaults when binfmt_aout built as module. That happens because aout binary handler gets on the top of the binfmt list due to late registration, and kernel attempts to execute the binary without preparatory work that must be done by binfmt_loader. Fixed by changing the registration order of the default binfmt handlers using list_add_tail() and introducing insert_binfmt() function which places new handler on the top of the binfmt list. This might be generally useful for installing arch-specific frontends for default handlers or just for overriding them. Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: Richard Henderson <rth@twiddle.net Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/kernel/Makefile | 6 +++++- arch/alpha/kernel/binfmt_loader.c | 2 +- fs/exec.c | 7 ++++--- include/linux/binfmts.h | 14 +++++++++++++- 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'fs/exec.c') diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index a427538252f..7739a62440a 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o + alpha_ksyms.o systbls.o err_common.o io.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o @@ -43,6 +43,10 @@ else # Misc support obj-$(CONFIG_ALPHA_SRM) += srmcons.o +ifdef CONFIG_BINFMT_AOUT +obj-y += binfmt_loader.o +endif + # Core logic support obj-$(CONFIG_ALPHA_APECS) += core_apecs.o obj-$(CONFIG_ALPHA_CIA) += core_cia.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c index 4a0af906b00..3fcfad41013 100644 --- a/arch/alpha/kernel/binfmt_loader.c +++ b/arch/alpha/kernel/binfmt_loader.c @@ -46,6 +46,6 @@ static struct linux_binfmt loader_format = { static int __init init_loader_binfmt(void) { - return register_binfmt(&loader_format); + return insert_binfmt(&loader_format); } arch_initcall(init_loader_binfmt); diff --git a/fs/exec.c b/fs/exec.c index a3a8ce83940..639177b0eea 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -69,17 +69,18 @@ int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int register_binfmt(struct linux_binfmt * fmt) +int __register_binfmt(struct linux_binfmt * fmt, int insert) { if (!fmt) return -EINVAL; write_lock(&binfmt_lock); - list_add(&fmt->lh, &formats); + insert ? list_add(&fmt->lh, &formats) : + list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); return 0; } -EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 6638b8148de..61ee18c1bdb 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -82,7 +82,19 @@ struct linux_binfmt { int hasvdso; }; -extern int register_binfmt(struct linux_binfmt *); +extern int __register_binfmt(struct linux_binfmt *fmt, int insert); + +/* Registration of default binfmt handlers */ +static inline int register_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 0); +} +/* Same as above, but adds a new binfmt at the top of the list */ +static inline int insert_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 1); +} + extern void unregister_binfmt(struct linux_binfmt *); extern int prepare_binprm(struct linux_binprm *); -- cgit v1.2.3-70-g09d2 From a44ddbb6d8a8ffe4e34e417048dfdd8f3dd1de4f Mon Sep 17 00:00:00 2001 From: Al Viro <viro@zeniv.linux.org.uk> Date: Mon, 6 Apr 2009 09:38:49 -0400 Subject: Make open_exec() and sys_uselib() use may_open(), instead of duplicating its parts Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> --- fs/exec.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 639177b0eea..41ae8e0de72 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -126,11 +126,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - error = inode_permission(nd.path.dentry->d_inode, - MAY_READ | MAY_EXEC | MAY_OPEN); - if (error) - goto exit; - error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN); + error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); if (error) goto exit; @@ -677,10 +673,7 @@ struct file *open_exec(const char *name) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto out_path_put; - err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); - if (err) - goto out_path_put; - err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN); + err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); if (err) goto out_path_put; -- cgit v1.2.3-70-g09d2 From 6e8341a11eb21826b7192d0bb88cb5b44900a9af Mon Sep 17 00:00:00 2001 From: Al Viro <viro@zeniv.linux.org.uk> Date: Mon, 6 Apr 2009 11:16:22 -0400 Subject: Switch open_exec() and sys_uselib() to do_open_filp() ... and make path_lookup_open() static Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> --- fs/exec.c | 72 ++++++++++++++++++--------------------------------- fs/namei.c | 13 +++++----- fs/open.c | 2 +- include/linux/fs.h | 2 +- include/linux/namei.h | 1 - 5 files changed, 34 insertions(+), 56 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 41ae8e0de72..895823d0149 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -105,36 +105,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt) SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; - struct nameidata nd; char *tmp = getname(library); int error = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - error = path_lookup_open(AT_FDCWD, tmp, - LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - putname(tmp); - } - if (error) + if (IS_ERR(tmp)) + goto out; + + file = do_filp_open(AT_FDCWD, tmp, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_READ | MAY_EXEC | MAY_OPEN); + putname(tmp); + error = PTR_ERR(file); + if (IS_ERR(file)) goto out; error = -EINVAL; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) goto exit; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto exit; - - error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); - if (error) + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - fsnotify_open(file->f_path.dentry); error = -ENOEXEC; @@ -156,13 +148,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) } read_unlock(&binfmt_lock); } +exit: fput(file); out: return error; -exit: - release_open_intent(&nd); - path_put(&nd.path); - goto out; } #ifdef CONFIG_MMU @@ -657,44 +646,33 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { - struct nameidata nd; struct file *file; int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - if (err) + file = do_filp_open(AT_FDCWD, name, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_EXEC | MAY_OPEN); + if (IS_ERR(file)) goto out; err = -EACCES; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) - goto out_path_put; - - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto out_path_put; - - err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); - if (err) - goto out_path_put; + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - if (IS_ERR(file)) - return file; + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; fsnotify_open(file->f_path.dentry); err = deny_write_access(file); - if (err) { - fput(file); - goto out; - } + if (err) + goto exit; +out: return file; - out_path_put: - release_open_intent(&nd); - path_put(&nd.path); - out: +exit: + fput(file); return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); diff --git a/fs/namei.c b/fs/namei.c index 78f253cd2d4..967c3db9272 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1130,8 +1130,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, * @nd: pointer to nameidata * @open_flags: open intent flags */ -int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) +static int path_lookup_open(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, int open_flags) { struct file *filp = get_empty_filp(); int err; @@ -1637,18 +1637,19 @@ static int open_will_write_to_fs(int flag, struct inode *inode) * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode) + int open_flag, int mode, int acc_mode) { struct file *filp; struct nameidata nd; - int acc_mode, error; + int error; struct path path; struct dentry *dir; int count = 0; int will_write; int flag = open_to_namei_flags(open_flag); - acc_mode = MAY_OPEN | ACC_MODE(flag); + if (!acc_mode) + acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) @@ -1869,7 +1870,7 @@ do_link: */ struct file *filp_open(const char *filename, int flags, int mode) { - return do_filp_open(AT_FDCWD, filename, flags, mode); + return do_filp_open(AT_FDCWD, filename, flags, mode, 0); } EXPORT_SYMBOL(filp_open); diff --git a/fs/open.c b/fs/open.c index 377eb25b6ab..bdfbf03615a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1033,7 +1033,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode); + struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); diff --git a/include/linux/fs.h b/include/linux/fs.h index 11484d08042..ed788426f46 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2118,7 +2118,7 @@ extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); extern struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode); + int open_flag, int mode, int acc_mode); extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); diff --git a/include/linux/namei.h b/include/linux/namei.h index fc2e0357987..518098fe63a 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -69,7 +69,6 @@ extern int path_lookup(const char *, unsigned, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); -extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); -- cgit v1.2.3-70-g09d2