summaryrefslogtreecommitdiffstats
path: root/fs/exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exec.c')
-rw-r--r--fs/exec.c272
1 files changed, 212 insertions, 60 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fe..da80612a35f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -42,7 +42,6 @@
#include <linux/pid_namespace.h>
#include <linux/module.h>
#include <linux/namei.h>
-#include <linux/proc_fs.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
@@ -55,6 +54,7 @@
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -166,8 +166,13 @@ out:
}
#ifdef CONFIG_MMU
-
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+/*
+ * The nascent bprm->mm is not visible until exec_mmap() but it can
+ * use a lot of memory, account these pages in current->mm temporary
+ * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
+ * change the counter back via acct_arg_size(0).
+ */
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
struct mm_struct *mm = current->mm;
long diff = (long)(pages - bprm->vma_pages);
@@ -176,17 +181,10 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
return;
bprm->vma_pages = pages;
-
-#ifdef SPLIT_RSS_COUNTING
- add_mm_counter(mm, MM_ANONPAGES, diff);
-#else
- spin_lock(&mm->page_table_lock);
add_mm_counter(mm, MM_ANONPAGES, diff);
- spin_unlock(&mm->page_table_lock);
-#endif
}
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -194,7 +192,7 @@ struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
#ifdef CONFIG_STACK_GROWSUP
if (write) {
- ret = expand_stack_downwards(bprm->vma, pos);
+ ret = expand_downwards(bprm->vma, pos);
if (ret < 0)
return NULL;
}
@@ -272,7 +270,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
* use STACK_TOP because that can depend on attributes which aren't
* configured yet.
*/
- BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
+ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
vma->vm_end = STACK_TOP_MAX;
vma->vm_start = vma->vm_end - PAGE_SIZE;
vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
@@ -305,11 +303,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
#else
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
}
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -398,22 +396,56 @@ err:
return err;
}
+struct user_arg_ptr {
+#ifdef CONFIG_COMPAT
+ bool is_compat;
+#endif
+ union {
+ const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+ compat_uptr_t __user *compat;
+#endif
+ } ptr;
+};
+
+static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
+{
+ const char __user *native;
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(argv.is_compat)) {
+ compat_uptr_t compat;
+
+ if (get_user(compat, argv.ptr.compat + nr))
+ return ERR_PTR(-EFAULT);
+
+ return compat_ptr(compat);
+ }
+#endif
+
+ if (get_user(native, argv.ptr.native + nr))
+ return ERR_PTR(-EFAULT);
+
+ return native;
+}
+
/*
* count() counts the number of strings in array ARGV.
*/
-static int count(const char __user * const __user * argv, int max)
+static int count(struct user_arg_ptr argv, int max)
{
int i = 0;
- if (argv != NULL) {
+ if (argv.ptr.native != NULL) {
for (;;) {
- const char __user * p;
+ const char __user *p = get_user_arg_ptr(argv, i);
- if (get_user(p, argv))
- return -EFAULT;
if (!p)
break;
- argv++;
+
+ if (IS_ERR(p))
+ return -EFAULT;
+
if (i++ >= max)
return -E2BIG;
@@ -430,7 +462,7 @@ static int count(const char __user * const __user * argv, int max)
* processes's memory to the new process's stack. The call to get_user_pages()
* ensures the destination page is created and not swapped out.
*/
-static int copy_strings(int argc, const char __user *const __user *argv,
+static int copy_strings(int argc, struct user_arg_ptr argv,
struct linux_binprm *bprm)
{
struct page *kmapped_page = NULL;
@@ -443,16 +475,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
int len;
unsigned long pos;
- if (get_user(str, argv+argc) ||
- !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
- ret = -EFAULT;
+ ret = -EFAULT;
+ str = get_user_arg_ptr(argv, argc);
+ if (IS_ERR(str))
goto out;
- }
- if (!valid_arg_len(bprm, len)) {
- ret = -E2BIG;
+ len = strnlen_user(str, MAX_ARG_STRLEN);
+ if (!len)
+ goto out;
+
+ ret = -E2BIG;
+ if (!valid_arg_len(bprm, len))
goto out;
- }
/* We're going to work our way backwords. */
pos = bprm->p;
@@ -519,14 +553,19 @@ out:
/*
* Like copy_strings, but get argv and its values from kernel memory.
*/
-int copy_strings_kernel(int argc, const char *const *argv,
+int copy_strings_kernel(int argc, const char *const *__argv,
struct linux_binprm *bprm)
{
int r;
mm_segment_t oldfs = get_fs();
+ struct user_arg_ptr argv = {
+ .ptr.native = (const char __user *const __user *)__argv,
+ };
+
set_fs(KERNEL_DS);
- r = copy_strings(argc, (const char __user *const __user *)argv, bprm);
+ r = copy_strings(argc, argv, bprm);
set_fs(oldfs);
+
return r;
}
EXPORT_SYMBOL(copy_strings_kernel);
@@ -553,7 +592,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -579,12 +618,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
+ free_pgd_range(&tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
@@ -593,10 +632,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
+ free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, new_end, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
@@ -917,9 +956,18 @@ static int de_thread(struct task_struct *tsk)
leader->group_leader = tsk;
tsk->exit_signal = SIGCHLD;
+ leader->exit_signal = -1;
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
+
+ /*
+ * We are going to release_task()->ptrace_unlink() silently,
+ * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
+ * the tracer wont't block again waiting for this thread.
+ */
+ if (unlikely(leader->ptrace))
+ __wake_up_parent(leader, leader->parent);
write_unlock_irq(&tasklist_lock);
release_task(leader);
@@ -1004,6 +1052,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
task_unlock(tsk);
return buf;
}
+EXPORT_SYMBOL_GPL(get_task_comm);
void set_task_comm(struct task_struct *tsk, char *buf)
{
@@ -1046,6 +1095,7 @@ int flush_old_exec(struct linux_binprm * bprm)
bprm->mm = NULL; /* We're using it now */
+ set_fs(USER_DS);
current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
flush_thread();
current->personality &= ~bprm->per_clear;
@@ -1057,6 +1107,13 @@ out:
}
EXPORT_SYMBOL(flush_old_exec);
+void would_dump(struct linux_binprm *bprm, struct file *file)
+{
+ if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
+ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+}
+EXPORT_SYMBOL(would_dump);
+
void setup_new_exec(struct linux_binprm * bprm)
{
int i, ch;
@@ -1096,9 +1153,10 @@ void setup_new_exec(struct linux_binprm * bprm)
if (bprm->cred->uid != current_euid() ||
bprm->cred->gid != current_egid()) {
current->pdeath_signal = 0;
- } else if (file_permission(bprm->file, MAY_READ) ||
- bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
- set_dumpable(current->mm, suid_dumpable);
+ } else {
+ would_dump(bprm, bprm->file);
+ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
+ set_dumpable(current->mm, suid_dumpable);
}
/*
@@ -1177,7 +1235,12 @@ int check_unsafe_exec(struct linux_binprm *bprm)
unsigned n_fs;
int res = 0;
- bprm->unsafe = tracehook_unsafe_exec(p);
+ if (p->ptrace) {
+ if (p->ptrace & PT_PTRACE_CAP)
+ bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
+ else
+ bprm->unsafe |= LSM_UNSAFE_PTRACE;
+ }
n_fs = 1;
spin_lock(&p->fs->lock);
@@ -1305,19 +1368,21 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
unsigned int depth = bprm->recursion_depth;
int try,retval;
struct linux_binfmt *fmt;
+ pid_t old_pid;
retval = security_bprm_check(bprm);
if (retval)
return retval;
- /* kernel module loader fixup */
- /* so we don't try to load run modprobe in kernel space. */
- set_fs(USER_DS);
-
retval = audit_bprm(bprm);
if (retval)
return retval;
+ /* Need to fetch pid before load_binary changes it */
+ rcu_read_lock();
+ old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
+ rcu_read_unlock();
+
retval = -ENOENT;
for (try=0; try<2; try++) {
read_lock(&binfmt_lock);
@@ -1337,7 +1402,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
bprm->recursion_depth = depth;
if (retval >= 0) {
if (depth == 0)
- tracehook_report_exec(fmt, bprm, regs);
+ ptrace_event(PTRACE_EVENT_EXEC,
+ old_pid);
put_binfmt(fmt);
allow_write_access(bprm->file);
if (bprm->file)
@@ -1357,9 +1423,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
}
}
read_unlock(&binfmt_lock);
+#ifdef CONFIG_MODULES
if (retval != -ENOEXEC || bprm->mm == NULL) {
break;
-#ifdef CONFIG_MODULES
} else {
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
if (printable(bprm->buf[0]) &&
@@ -1367,9 +1433,13 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
printable(bprm->buf[2]) &&
printable(bprm->buf[3]))
break; /* -ENOEXEC */
+ if (try)
+ break; /* -ENOEXEC */
request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
-#endif
}
+#else
+ break;
+#endif
}
return retval;
}
@@ -1379,10 +1449,10 @@ EXPORT_SYMBOL(search_binary_handler);
/*
* sys_execve() executes a new program.
*/
-int do_execve(const char * filename,
- const char __user *const __user *argv,
- const char __user *const __user *envp,
- struct pt_regs * regs)
+static int do_execve_common(const char *filename,
+ struct user_arg_ptr argv,
+ struct user_arg_ptr envp,
+ struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
@@ -1489,6 +1559,34 @@ out_ret:
return retval;
}
+int do_execve(const char *filename,
+ const char __user *const __user *__argv,
+ const char __user *const __user *__envp,
+ struct pt_regs *regs)
+{
+ struct user_arg_ptr argv = { .ptr.native = __argv };
+ struct user_arg_ptr envp = { .ptr.native = __envp };
+ return do_execve_common(filename, argv, envp, regs);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_do_execve(char *filename,
+ compat_uptr_t __user *__argv,
+ compat_uptr_t __user *__envp,
+ struct pt_regs *regs)
+{
+ struct user_arg_ptr argv = {
+ .is_compat = true,
+ .ptr.compat = __argv,
+ };
+ struct user_arg_ptr envp = {
+ .is_compat = true,
+ .ptr.compat = __envp,
+ };
+ return do_execve_common(filename, argv, envp, regs);
+}
+#endif
+
void set_binfmt(struct linux_binfmt *new)
{
struct mm_struct *mm = current->mm;
@@ -1548,6 +1646,50 @@ expand_fail:
return ret;
}
+static void cn_escape(char *str)
+{
+ for (; *str; str++)
+ if (*str == '/')
+ *str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+ struct file *exe_file;
+ char *pathbuf, *path;
+ int ret;
+
+ exe_file = get_mm_exe_file(current->mm);
+ if (!exe_file) {
+ char *commstart = cn->corename + cn->used;
+ ret = cn_printf(cn, "%s (path unknown)", current->comm);
+ cn_escape(commstart);
+ return ret;
+ }
+
+ pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+ if (!pathbuf) {
+ ret = -ENOMEM;
+ goto put_exe_file;
+ }
+
+ path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ goto free_buf;
+ }
+
+ cn_escape(path);
+
+ ret = cn_printf(cn, "%s", path);
+
+free_buf:
+ kfree(pathbuf);
+put_exe_file:
+ fput(exe_file);
+ return ret;
+}
+
/* format_corename will inspect the pattern parameter, and output a
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
@@ -1609,15 +1751,24 @@ static int format_corename(struct core_name *cn, long signr)
break;
}
/* hostname */
- case 'h':
+ case 'h': {
+ char *namestart = cn->corename + cn->used;
down_read(&uts_sem);
err = cn_printf(cn, "%s",
utsname()->nodename);
up_read(&uts_sem);
+ cn_escape(namestart);
break;
+ }
/* executable */
- case 'e':
+ case 'e': {
+ char *commstart = cn->corename + cn->used;
err = cn_printf(cn, "%s", current->comm);
+ cn_escape(commstart);
+ break;
+ }
+ case 'E':
+ err = cn_print_exe_file(cn);
break;
/* core limit size */
case 'c':
@@ -1659,6 +1810,7 @@ static int zap_process(struct task_struct *start, int exit_code)
t = start;
do {
+ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && t->mm) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
@@ -1885,7 +2037,7 @@ static void wait_for_dump_helpers(struct file *file)
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info)
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
{
struct file *rp, *wp;
struct fdtable *fdt;
@@ -1978,16 +2130,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
ispipe = format_corename(&cn, signr);
- if (ispipe == -ENOMEM) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
if (ispipe) {
int dump_count;
char **helper_argv;
+ if (ispipe < 0) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
+
if (cprm.limit == 1) {
/*
* Normally core limits are irrelevant to pipes, since