From 499eea6bf9c06df3bf4549954aee6fb3427946ed Mon Sep 17 00:00:00 2001 From: Sasikantha babu Date: Thu, 31 May 2012 16:26:07 -0700 Subject: sethostname/setdomainname: notify userspace when there is a change in uts_kern_table sethostname() and setdomainname() notify userspace on failure (without modifying uts_kern_table). Change things so that we only notify userspace on success, when uts_kern_table was actually modified. Signed-off-by: Sasikantha babu Cc: Paul Gortmaker Cc: Greg Kroah-Hartman Cc: WANG Cong Reviewed-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 6df42624e45..8b71cef3bf1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1378,8 +1378,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) memcpy(u->nodename, tmp, len); memset(u->nodename + len, 0, sizeof(u->nodename) - len); errno = 0; + uts_proc_notify(UTS_PROC_HOSTNAME); } - uts_proc_notify(UTS_PROC_HOSTNAME); up_write(&uts_sem); return errno; } @@ -1429,8 +1429,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) memcpy(u->domainname, tmp, len); memset(u->domainname + len, 0, sizeof(u->domainname) - len); errno = 0; + uts_proc_notify(UTS_PROC_DOMAINNAME); } - uts_proc_notify(UTS_PROC_DOMAINNAME); up_write(&uts_sem); return errno; } -- cgit v1.2.3-70-g09d2 From 81ab6e7b26b453a795d46f2616ed0e31d97f05b9 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 31 May 2012 16:26:15 -0700 Subject: kmod: convert two call sites to call_usermodehelper_fns() Both kernel/sys.c && security/keys/request_key.c where inlining the exact same code as call_usermodehelper_fns(); So simply convert these sites to directly use call_usermodehelper_fns(). Signed-off-by: Boaz Harrosh Cc: Oleg Nesterov Cc: Tetsuo Handa Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 19 ++++++++----------- security/keys/request_key.c | 13 +++---------- 2 files changed, 11 insertions(+), 21 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 8b71cef3bf1..6e81aa7e468 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2114,7 +2114,6 @@ int orderly_poweroff(bool force) NULL }; int ret = -ENOMEM; - struct subprocess_info *info; if (argv == NULL) { printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", @@ -2122,18 +2121,16 @@ int orderly_poweroff(bool force) goto out; } - info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); - if (info == NULL) { - argv_free(argv); - goto out; - } - - call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL); + ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, + NULL, argv_cleanup, NULL); +out: + if (likely(!ret)) + return 0; - ret = call_usermodehelper_exec(info, UMH_NO_WAIT); + if (ret == -ENOMEM) + argv_free(argv); - out: - if (ret && force) { + if (force) { printk(KERN_WARNING "Failed to start orderly shutdown: " "forcing the issue\n"); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index cc3790315d2..000e7501752 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -93,16 +93,9 @@ static void umh_keys_cleanup(struct subprocess_info *info) static int call_usermodehelper_keys(char *path, char **argv, char **envp, struct key *session_keyring, int wait) { - gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - struct subprocess_info *info = - call_usermodehelper_setup(path, argv, envp, gfp_mask); - - if (!info) - return -ENOMEM; - - call_usermodehelper_setfns(info, umh_keys_init, umh_keys_cleanup, - key_get(session_keyring)); - return call_usermodehelper_exec(info, wait); + return call_usermodehelper_fns(path, argv, envp, wait, + umh_keys_init, umh_keys_cleanup, + key_get(session_keyring)); } /* -- cgit v1.2.3-70-g09d2 From fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:45 -0700 Subject: c/r: prctl: extend PR_SET_MM to set up more mm_struct entries During checkpoint we dump whole process memory to a file and the dump includes process stack memory. But among stack data itself, the stack carries additional parameters such as command line arguments, environment data and auxiliary vector. So when we do restore procedure and once we've restored stack data itself we need to setup mm_struct::arg_start/end, env_start/end, so restored process would be able to find command line arguments and environment data it had at checkpoint time. The same applies to auxiliary vector. For this reason additional PR_SET_MM_(ARG_START | ARG_END | ENV_START | ENV_END | AUXV) codes are introduced. Signed-off-by: Cyrill Gorcunov Acked-by: Kees Cook Cc: Tejun Heo Cc: Andrew Vagin Cc: Serge Hallyn Cc: Pavel Emelyanov Cc: Vasiliy Kulikov Cc: KAMEZAWA Hiroyuki Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 5 ++ kernel/sys.c | 134 +++++++++++++++++++++++++++++++------------------- 2 files changed, 88 insertions(+), 51 deletions(-) (limited to 'kernel/sys.c') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 78b76e24cc7..18d84c4b42d 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -113,6 +113,11 @@ # define PR_SET_MM_START_STACK 5 # define PR_SET_MM_START_BRK 6 # define PR_SET_MM_BRK 7 +# define PR_SET_MM_ARG_START 8 +# define PR_SET_MM_ARG_END 9 +# define PR_SET_MM_ENV_START 10 +# define PR_SET_MM_ENV_END 11 +# define PR_SET_MM_AUXV 12 /* * Set specific pid that is allowed to ptrace the current task. diff --git a/kernel/sys.c b/kernel/sys.c index 6e81aa7e468..8b544972e46 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1784,17 +1784,23 @@ SYSCALL_DEFINE1(umask, int, mask) } #ifdef CONFIG_CHECKPOINT_RESTORE +static bool vma_flags_mismatch(struct vm_area_struct *vma, + unsigned long required, + unsigned long banned) +{ + return (vma->vm_flags & required) != required || + (vma->vm_flags & banned); +} + static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { unsigned long rlim = rlimit(RLIMIT_DATA); - unsigned long vm_req_flags; - unsigned long vm_bad_flags; - struct vm_area_struct *vma; - int error = 0; struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int error; - if (arg4 | arg5) + if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) return -EINVAL; if (!capable(CAP_SYS_RESOURCE)) @@ -1803,58 +1809,23 @@ static int prctl_set_mm(int opt, unsigned long addr, if (addr >= TASK_SIZE) return -EINVAL; + error = -EINVAL; + down_read(&mm->mmap_sem); vma = find_vma(mm, addr); - if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) { - /* It must be existing VMA */ - if (!vma || vma->vm_start > addr) - goto out; - } - - error = -EINVAL; switch (opt) { case PR_SET_MM_START_CODE: + mm->start_code = addr; + break; case PR_SET_MM_END_CODE: - vm_req_flags = VM_READ | VM_EXEC; - vm_bad_flags = VM_WRITE | VM_MAYSHARE; - - if ((vma->vm_flags & vm_req_flags) != vm_req_flags || - (vma->vm_flags & vm_bad_flags)) - goto out; - - if (opt == PR_SET_MM_START_CODE) - mm->start_code = addr; - else - mm->end_code = addr; + mm->end_code = addr; break; - case PR_SET_MM_START_DATA: - case PR_SET_MM_END_DATA: - vm_req_flags = VM_READ | VM_WRITE; - vm_bad_flags = VM_EXEC | VM_MAYSHARE; - - if ((vma->vm_flags & vm_req_flags) != vm_req_flags || - (vma->vm_flags & vm_bad_flags)) - goto out; - - if (opt == PR_SET_MM_START_DATA) - mm->start_data = addr; - else - mm->end_data = addr; + mm->start_data = addr; break; - - case PR_SET_MM_START_STACK: - -#ifdef CONFIG_STACK_GROWSUP - vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP; -#else - vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN; -#endif - if ((vma->vm_flags & vm_req_flags) != vm_req_flags) - goto out; - - mm->start_stack = addr; + case PR_SET_MM_END_DATA: + mm->end_data = addr; break; case PR_SET_MM_START_BRK: @@ -1881,16 +1852,77 @@ static int prctl_set_mm(int opt, unsigned long addr, mm->brk = addr; break; + /* + * If command line arguments and environment + * are placed somewhere else on stack, we can + * set them up here, ARG_START/END to setup + * command line argumets and ENV_START/END + * for environment. + */ + case PR_SET_MM_START_STACK: + case PR_SET_MM_ARG_START: + case PR_SET_MM_ARG_END: + case PR_SET_MM_ENV_START: + case PR_SET_MM_ENV_END: + if (!vma) { + error = -EFAULT; + goto out; + } +#ifdef CONFIG_STACK_GROWSUP + if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) +#else + if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) +#endif + goto out; + if (opt == PR_SET_MM_START_STACK) + mm->start_stack = addr; + else if (opt == PR_SET_MM_ARG_START) + mm->arg_start = addr; + else if (opt == PR_SET_MM_ARG_END) + mm->arg_end = addr; + else if (opt == PR_SET_MM_ENV_START) + mm->env_start = addr; + else if (opt == PR_SET_MM_ENV_END) + mm->env_end = addr; + break; + + /* + * This doesn't move auxiliary vector itself + * since it's pinned to mm_struct, but allow + * to fill vector with new values. It's up + * to a caller to provide sane values here + * otherwise user space tools which use this + * vector might be unhappy. + */ + case PR_SET_MM_AUXV: { + unsigned long user_auxv[AT_VECTOR_SIZE]; + + if (arg4 > sizeof(user_auxv)) + goto out; + up_read(&mm->mmap_sem); + + if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) + return -EFAULT; + + /* Make sure the last entry is always AT_NULL */ + user_auxv[AT_VECTOR_SIZE - 2] = 0; + user_auxv[AT_VECTOR_SIZE - 1] = 0; + + BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); + + task_lock(current); + memcpy(mm->saved_auxv, user_auxv, arg4); + task_unlock(current); + + return 0; + } default: - error = -EINVAL; goto out; } error = 0; - out: up_read(&mm->mmap_sem); - return error; } #else /* CONFIG_CHECKPOINT_RESTORE */ -- cgit v1.2.3-70-g09d2 From b32dfe377102ce668775f8b6b1461f7ad428f8b6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:46 -0700 Subject: c/r: prctl: add ability to set new mm_struct::exe_file When we do restore we would like to have a way to setup a former mm_struct::exe_file so that /proc/pid/exe would point to the original executable file a process had at checkpoint time. For this the PR_SET_MM_EXE_FILE code is introduced. This option takes a file descriptor which will be set as a source for new /proc/$pid/exe symlink. Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE vmas present for current process, simply because this feature is a special to C/R and mm::num_exe_file_vmas become meaningless after that. To minimize the amount of transition the /proc/pid/exe symlink might have, this feature is implemented in one-shot manner. Thus once changed the symlink can't be changed again. This should help sysadmins to monitor the symlinks over all process running in a system. In particular one could make a snapshot of processes and ring alarm if there unexpected changes of /proc/pid/exe's in a system. Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and the caller must have CAP_SYS_RESOURCE capability granted, otherwise the request to change symlink will be rejected. Signed-off-by: Cyrill Gorcunov Reviewed-by: Oleg Nesterov Cc: KOSAKI Motohiro Cc: Pavel Emelyanov Cc: Kees Cook Cc: Tejun Heo Cc: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 1 + kernel/sys.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'kernel/sys.c') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 18d84c4b42d..711e0a30aac 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -118,6 +118,7 @@ # define PR_SET_MM_ENV_START 10 # define PR_SET_MM_ENV_END 11 # define PR_SET_MM_AUXV 12 +# define PR_SET_MM_EXE_FILE 13 /* * Set specific pid that is allowed to ptrace the current task. diff --git a/kernel/sys.c b/kernel/sys.c index 8b544972e46..9ff89cb9657 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include #include @@ -1792,6 +1794,57 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma, (vma->vm_flags & banned); } +static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) +{ + struct file *exe_file; + struct dentry *dentry; + int err; + + /* + * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's + * remain. So perform a quick test first. + */ + if (mm->num_exe_file_vmas) + return -EBUSY; + + exe_file = fget(fd); + if (!exe_file) + return -EBADF; + + dentry = exe_file->f_path.dentry; + + /* + * Because the original mm->exe_file points to executable file, make + * sure that this one is executable as well, to avoid breaking an + * overall picture. + */ + err = -EACCES; + if (!S_ISREG(dentry->d_inode->i_mode) || + exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; + + err = inode_permission(dentry->d_inode, MAY_EXEC); + if (err) + goto exit; + + /* + * The symlink can be changed only once, just to disallow arbitrary + * transitions malicious software might bring in. This means one + * could make a snapshot over all processes running and monitor + * /proc/pid/exe changes to notice unusual activity if needed. + */ + down_write(&mm->mmap_sem); + if (likely(!mm->exe_file)) + set_mm_exe_file(mm, exe_file); + else + err = -EBUSY; + up_write(&mm->mmap_sem); + +exit: + fput(exe_file); + return err; +} + static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { @@ -1806,6 +1859,9 @@ static int prctl_set_mm(int opt, unsigned long addr, if (!capable(CAP_SYS_RESOURCE)) return -EPERM; + if (opt == PR_SET_MM_EXE_FILE) + return prctl_set_mm_exe_file(mm, (unsigned int)addr); + if (addr >= TASK_SIZE) return -EINVAL; -- cgit v1.2.3-70-g09d2