diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/uprobes.c | 213 | ||||
-rw-r--r-- | kernel/fork.c | 40 | ||||
-rw-r--r-- | kernel/kexec.c | 2 | ||||
-rw-r--r-- | kernel/kmod.c | 37 | ||||
-rw-r--r-- | kernel/panic.c | 8 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/printk.c | 32 | ||||
-rw-r--r-- | kernel/resource.c | 24 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/sys.c | 57 | ||||
-rw-r--r-- | kernel/sysctl.c | 43 | ||||
-rw-r--r-- | kernel/taskstats.c | 5 | ||||
-rw-r--r-- | kernel/watchdog.c | 21 |
13 files changed, 298 insertions, 189 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f93532748bc..c08a22d02f7 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -32,6 +32,7 @@ #include <linux/swap.h> /* try_to_free_swap */ #include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/kdebug.h> /* notifier mechanism */ +#include "../../mm/internal.h" /* munlock_vma_page */ #include <linux/uprobes.h> @@ -112,14 +113,14 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) return false; } -static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) +static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) { - loff_t vaddr; - - vaddr = vma->vm_start + offset; - vaddr -= vma->vm_pgoff << PAGE_SHIFT; + return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); +} - return vaddr; +static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) +{ + return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); } /** @@ -127,25 +128,27 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page + * @addr: address the old @page is mapped at * @page: the cowed page we are replacing by kpage * @kpage: the modified page we replace page by * * Returns 0 on success, -EFAULT on failure. */ -static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) +static int __replace_page(struct vm_area_struct *vma, unsigned long addr, + struct page *page, struct page *kpage) { struct mm_struct *mm = vma->vm_mm; - unsigned long addr; spinlock_t *ptl; pte_t *ptep; + int err; - addr = page_address_in_vma(page, vma); - if (addr == -EFAULT) - return -EFAULT; + /* For try_to_free_swap() and munlock_vma_page() below */ + lock_page(page); + err = -EAGAIN; ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) - return -EAGAIN; + goto unlock; get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr); @@ -162,10 +165,16 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page_remove_rmap(page); if (!page_mapped(page)) try_to_free_swap(page); - put_page(page); pte_unmap_unlock(ptep, ptl); - return 0; + if (vma->vm_flags & VM_LOCKED) + munlock_vma_page(page); + put_page(page); + + err = 0; + unlock: + unlock_page(page); + return err; } /** @@ -206,45 +215,23 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; - struct address_space *mapping; void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; - struct uprobe *uprobe; int ret; + retry: /* Read the page with vaddr into memory */ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); if (ret <= 0) return ret; - ret = -EINVAL; - - /* - * We are interested in text pages only. Our pages of interest - * should be mapped for read and execute only. We desist from - * adding probes in write mapped pages since the breakpoints - * might end up in the file copy. - */ - if (!valid_vma(vma, is_swbp_insn(&opcode))) - goto put_out; - - uprobe = container_of(auprobe, struct uprobe, arch); - mapping = uprobe->inode->i_mapping; - if (mapping != vma->vm_file->f_mapping) - goto put_out; - ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) - goto put_out; + goto put_old; __SetPageUptodate(new_page); - /* - * lock page will serialize against do_wp_page()'s - * PageAnon() handling - */ - lock_page(old_page); /* copy the page now that we've got it stable */ vaddr_old = kmap_atomic(old_page); vaddr_new = kmap_atomic(new_page); @@ -257,17 +244,13 @@ retry: ret = anon_vma_prepare(vma); if (ret) - goto unlock_out; + goto put_new; - lock_page(new_page); - ret = __replace_page(vma, old_page, new_page); - unlock_page(new_page); + ret = __replace_page(vma, vaddr, old_page, new_page); -unlock_out: - unlock_page(old_page); +put_new: page_cache_release(new_page); - -put_out: +put_old: put_page(old_page); if (unlikely(ret == -EAGAIN)) @@ -791,7 +774,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) curr = info; info->mm = vma->vm_mm; - info->vaddr = vma_address(vma, offset); + info->vaddr = offset_to_vaddr(vma, offset); } mutex_unlock(&mapping->i_mmap_mutex); @@ -839,12 +822,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) goto free; down_write(&mm->mmap_sem); - vma = find_vma(mm, (unsigned long)info->vaddr); - if (!vma || !valid_vma(vma, is_register)) + vma = find_vma(mm, info->vaddr); + if (!vma || !valid_vma(vma, is_register) || + vma->vm_file->f_mapping->host != uprobe->inode) goto unlock; - if (vma->vm_file->f_mapping->host != uprobe->inode || - vma_address(vma, uprobe->offset) != info->vaddr) + if (vma->vm_start > info->vaddr || + vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; if (is_register) { @@ -960,59 +944,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume put_uprobe(uprobe); } -/* - * Of all the nodes that correspond to the given inode, return the node - * with the least offset. - */ -static struct rb_node *find_least_offset_node(struct inode *inode) +static struct rb_node * +find_node_in_range(struct inode *inode, loff_t min, loff_t max) { - struct uprobe u = { .inode = inode, .offset = 0}; struct rb_node *n = uprobes_tree.rb_node; - struct rb_node *close_node = NULL; - struct uprobe *uprobe; - int match; while (n) { - uprobe = rb_entry(n, struct uprobe, rb_node); - match = match_uprobe(&u, uprobe); - - if (uprobe->inode == inode) - close_node = n; - - if (!match) - return close_node; + struct uprobe *u = rb_entry(n, struct uprobe, rb_node); - if (match < 0) + if (inode < u->inode) { n = n->rb_left; - else + } else if (inode > u->inode) { n = n->rb_right; + } else { + if (max < u->offset) + n = n->rb_left; + else if (min > u->offset) + n = n->rb_right; + else + break; + } } - return close_node; + return n; } /* - * For a given inode, build a list of probes that need to be inserted. + * For a given range in vma, build a list of probes that need to be inserted. */ -static void build_probe_list(struct inode *inode, struct list_head *head) +static void build_probe_list(struct inode *inode, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *head) { - struct uprobe *uprobe; + loff_t min, max; unsigned long flags; - struct rb_node *n; - - spin_lock_irqsave(&uprobes_treelock, flags); - - n = find_least_offset_node(inode); + struct rb_node *n, *t; + struct uprobe *u; - for (; n; n = rb_next(n)) { - uprobe = rb_entry(n, struct uprobe, rb_node); - if (uprobe->inode != inode) - break; + INIT_LIST_HEAD(head); + min = vaddr_to_offset(vma, start); + max = min + (end - start) - 1; - list_add(&uprobe->pending_list, head); - atomic_inc(&uprobe->ref); + spin_lock_irqsave(&uprobes_treelock, flags); + n = find_node_in_range(inode, min, max); + if (n) { + for (t = n; t; t = rb_prev(t)) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset < min) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } + for (t = n; (t = rb_next(t)); ) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset > max) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } } - spin_unlock_irqrestore(&uprobes_treelock, flags); } @@ -1031,7 +1022,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; int ret, count; @@ -1042,21 +1033,15 @@ int uprobe_mmap(struct vm_area_struct *vma) if (!inode) return 0; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); + build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); ret = 0; count = 0; - list_for_each_entry(uprobe, &tmp_list, pending_list) { + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!ret) { - loff_t vaddr = vma_address(vma, uprobe->offset); - - if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { - put_uprobe(uprobe); - continue; - } + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); /* @@ -1097,12 +1082,15 @@ int uprobe_mmap(struct vm_area_struct *vma) void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) return; + if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ + return; + if (!atomic_read(&vma->vm_mm->uprobes_state.count)) return; @@ -1110,21 +1098,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!inode) return; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); - - list_for_each_entry(uprobe, &tmp_list, pending_list) { - loff_t vaddr = vma_address(vma, uprobe->offset); - - if (vaddr >= start && vaddr < end) { - /* - * An unregister could have removed the probe before - * unmap. So check before we decrement the count. - */ - if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) - atomic_dec(&vma->vm_mm->uprobes_state.count); - } + build_probe_list(inode, vma, start, end, &tmp_list); + + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); + /* + * An unregister could have removed the probe before + * unmap. So check before we decrement the count. + */ + if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) + atomic_dec(&vma->vm_mm->uprobes_state.count); put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); @@ -1463,12 +1447,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) vma = find_vma(mm, bp_vaddr); if (vma && vma->vm_start <= bp_vaddr) { if (valid_vma(vma, false)) { - struct inode *inode; - loff_t offset; + struct inode *inode = vma->vm_file->f_mapping->host; + loff_t offset = vaddr_to_offset(vma, bp_vaddr); - inode = vma->vm_file->f_mapping->host; - offset = bp_vaddr - vma->vm_start; - offset += (vma->vm_pgoff << PAGE_SHIFT); uprobe = find_uprobe(inode, offset); } diff --git a/kernel/fork.c b/kernel/fork.c index ff1cad3b7bd..8efac1fe56b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -114,6 +114,10 @@ int nr_processes(void) return total; } +void __weak arch_release_task_struct(struct task_struct *tsk) +{ +} + #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static struct kmem_cache *task_struct_cachep; @@ -122,17 +126,17 @@ static inline struct task_struct *alloc_task_struct_node(int node) return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); } -void __weak arch_release_task_struct(struct task_struct *tsk) { } - static inline void free_task_struct(struct task_struct *tsk) { - arch_release_task_struct(tsk); kmem_cache_free(task_struct_cachep, tsk); } #endif +void __weak arch_release_thread_info(struct thread_info *ti) +{ +} + #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR -void __weak arch_release_thread_info(struct thread_info *ti) { } /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a @@ -150,7 +154,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, static inline void free_thread_info(struct thread_info *ti) { - arch_release_thread_info(ti); free_pages((unsigned long)ti, THREAD_SIZE_ORDER); } # else @@ -164,7 +167,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, static void free_thread_info(struct thread_info *ti) { - arch_release_thread_info(ti); kmem_cache_free(thread_info_cache, ti); } @@ -205,10 +207,12 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); + arch_release_thread_info(tsk->stack); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); + arch_release_task_struct(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -298,23 +302,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return NULL; ti = alloc_thread_info_node(tsk, node); - if (!ti) { - free_task_struct(tsk); - return NULL; - } + if (!ti) + goto free_tsk; err = arch_dup_task_struct(tsk, orig); + if (err) + goto free_ti; - /* - * We defer looking at err, because we will need this setup - * for the clean up path to work correctly. - */ tsk->stack = ti; - setup_thread_stack(tsk, orig); - - if (err) - goto out; + setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); stackend = end_of_stack(tsk); @@ -338,8 +335,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return tsk; -out: +free_ti: free_thread_info(ti); +free_tsk: free_task_struct(tsk); return NULL; } @@ -391,8 +389,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { - unsigned long len; - len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + unsigned long len = vma_pages(mpnt); + if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ goto fail_nomem; charge = len; diff --git a/kernel/kexec.c b/kernel/kexec.c index 4e2e472f6ae..0668d58d641 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1424,7 +1424,7 @@ static void update_vmcoreinfo_note(void) void crash_save_vmcoreinfo(void) { - vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); update_vmcoreinfo_note(); } diff --git a/kernel/kmod.c b/kernel/kmod.c index ff2c7cb86d7..6f99aead66c 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -45,6 +45,13 @@ extern int max_threads; static struct workqueue_struct *khelper_wq; +/* + * kmod_thread_locker is used for deadlock avoidance. There is no explicit + * locking to protect this global - it is private to the singleton khelper + * thread and should only ever be modified by that thread. + */ +static const struct task_struct *kmod_thread_locker; + #define CAP_BSET (void *)1 #define CAP_PI (void *)2 @@ -221,6 +228,13 @@ fail: return 0; } +static int call_helper(void *data) +{ + /* Worker thread started blocking khelper thread. */ + kmod_thread_locker = current; + return ____call_usermodehelper(data); +} + static void call_usermodehelper_freeinfo(struct subprocess_info *info) { if (info->cleanup) @@ -295,9 +309,12 @@ static void __call_usermodehelper(struct work_struct *work) if (wait == UMH_WAIT_PROC) pid = kernel_thread(wait_for_helper, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); - else - pid = kernel_thread(____call_usermodehelper, sub_info, + else { + pid = kernel_thread(call_helper, sub_info, CLONE_VFORK | SIGCHLD); + /* Worker thread stopped blocking khelper thread. */ + kmod_thread_locker = NULL; + } switch (wait) { case UMH_NO_WAIT: @@ -548,6 +565,16 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) retval = -EBUSY; goto out; } + /* + * Worker thread must not wait for khelper thread at below + * wait_for_completion() if the thread was created with CLONE_VFORK + * flag, for khelper thread is already waiting for the thread at + * wait_for_completion() in do_fork(). + */ + if (wait != UMH_NO_WAIT && current == kmod_thread_locker) { + retval = -EBUSY; + goto out; + } sub_info->complete = &done; sub_info->wait = wait; @@ -577,6 +604,12 @@ unlock: return retval; } +/* + * call_usermodehelper_fns() will not run the caller-provided cleanup function + * if a memory allocation failure is experienced. So the caller might need to + * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform + * the necessaary cleanup within the caller. + */ int call_usermodehelper_fns( char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), diff --git a/kernel/panic.c b/kernel/panic.c index d2a5f4ecc6d..e1b2822fff9 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -75,6 +75,14 @@ void panic(const char *fmt, ...) int state = 0; /* + * Disable local interrupts. This will prevent panic_smp_self_stop + * from deadlocking the first cpu that invokes the panic, since + * there is nothing to prevent an interrupt handler (that runs + * after the panic_lock is acquired) from invoking panic again. + */ + local_irq_disable(); + + /* * It's possible to come here directly from a panic-assertion and * not have preempt disabled. Some functions called from here want * preempt to be disabled. No point enabling it later though... diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c8b7446b27d..1da39ea248f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -178,6 +178,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + /* Kick the lockup detector */ + lockup_detector_bootcpu_resume(); + Enable_cpus: enable_nonboot_cpus(); diff --git a/kernel/printk.c b/kernel/printk.c index 50c96b5651b..6a76ab9d447 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -389,8 +389,10 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, line = buf; for (i = 0; i < count; i++) { - if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) + if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) { + ret = -EFAULT; goto out; + } line += iv[i].iov_len; } @@ -1540,17 +1542,23 @@ asmlinkage int vprintk_emit(int facility, int level, lflags |= LOG_NEWLINE; } - /* strip syslog prefix and extract log level or control flags */ - if (text[0] == '<' && text[1] && text[2] == '>') { - switch (text[1]) { - case '0' ... '7': - if (level == -1) - level = text[1] - '0'; - case 'd': /* KERN_DEFAULT */ - lflags |= LOG_PREFIX; - case 'c': /* KERN_CONT */ - text += 3; - text_len -= 3; + /* strip kernel syslog prefix and extract log level or control flags */ + if (facility == 0) { + int kern_level = printk_get_level(text); + + if (kern_level) { + const char *end_of_header = printk_skip_level(text); + switch (kern_level) { + case '0' ... '7': + if (level == -1) + level = kern_level - '0'; + case 'd': /* KERN_DEFAULT */ + lflags |= LOG_PREFIX; + case 'c': /* KERN_CONT */ + break; + } + text_len -= end_of_header - text; + text = (char *)end_of_header; } } diff --git a/kernel/resource.c b/kernel/resource.c index dc8b4776444..34d45886ee8 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -7,6 +7,8 @@ * Arbitrary resource management. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/export.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -791,8 +793,28 @@ void __init reserve_region_with_split(struct resource *root, resource_size_t start, resource_size_t end, const char *name) { + int abort = 0; + write_lock(&resource_lock); - __reserve_region_with_split(root, start, end, name); + if (root->start > start || root->end < end) { + pr_err("requested range [0x%llx-0x%llx] not in root %pr\n", + (unsigned long long)start, (unsigned long long)end, + root); + if (start > root->end || end < root->start) + abort = 1; + else { + if (end > root->end) + end = root->end; + if (start < root->start) + start = root->start; + pr_err("fixing request to [0x%llx-0x%llx]\n", + (unsigned long long)start, + (unsigned long long)end); + } + dump_stack(); + } + if (!abort) + __reserve_region_with_split(root, start, end, name); write_unlock(&resource_lock); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5d011ef4c0d..d325c4b2dcb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1910,12 +1910,12 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + trace_sched_switch(prev, next); sched_info_switch(prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); - trace_sched_switch(prev, next); } /** diff --git a/kernel/sys.c b/kernel/sys.c index 2d39a84cd85..241507f23ec 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2015,7 +2015,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, break; } me->pdeath_signal = arg2; - error = 0; break; case PR_GET_PDEATHSIG: error = put_user(me->pdeath_signal, (int __user *)arg2); @@ -2029,7 +2028,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, break; } set_dumpable(me->mm, arg2); - error = 0; break; case PR_SET_UNALIGN: @@ -2056,10 +2054,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_TIMING: if (arg2 != PR_TIMING_STATISTICAL) error = -EINVAL; - else - error = 0; break; - case PR_SET_NAME: comm[sizeof(me->comm)-1] = 0; if (strncpy_from_user(comm, (char __user *)arg2, @@ -2067,20 +2062,19 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EFAULT; set_task_comm(me, comm); proc_comm_connector(me); - return 0; + break; case PR_GET_NAME: get_task_comm(comm, me); if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) return -EFAULT; - return 0; + break; case PR_GET_ENDIAN: error = GET_ENDIAN(me, arg2); break; case PR_SET_ENDIAN: error = SET_ENDIAN(me, arg2); break; - case PR_GET_SECCOMP: error = prctl_get_seccomp(); break; @@ -2108,7 +2102,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, current->default_timer_slack_ns; else current->timer_slack_ns = arg2; - error = 0; break; case PR_MCE_KILL: if (arg4 | arg5) @@ -2134,7 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, default: return -EINVAL; } - error = 0; break; case PR_MCE_KILL_GET: if (arg2 | arg3 | arg4 | arg5) @@ -2153,7 +2145,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, break; case PR_SET_CHILD_SUBREAPER: me->signal->is_child_subreaper = !!arg2; - error = 0; break; case PR_GET_CHILD_SUBREAPER: error = put_user(me->signal->is_child_subreaper, @@ -2195,46 +2186,52 @@ static void argv_cleanup(struct subprocess_info *info) argv_free(info->argv); } -/** - * orderly_poweroff - Trigger an orderly system poweroff - * @force: force poweroff if command execution fails - * - * This may be called from any context to trigger a system shutdown. - * If the orderly shutdown fails, it will force an immediate shutdown. - */ -int orderly_poweroff(bool force) +static int __orderly_poweroff(void) { int argc; - char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); + char **argv; static char *envp[] = { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL }; - int ret = -ENOMEM; + int ret; + argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); if (argv == NULL) { printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", __func__, poweroff_cmd); - goto out; + return -ENOMEM; } ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, NULL, argv_cleanup, NULL); -out: - if (likely(!ret)) - return 0; - if (ret == -ENOMEM) argv_free(argv); - if (force) { + return ret; +} + +/** + * orderly_poweroff - Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + int ret = __orderly_poweroff(); + + if (ret && force) { printk(KERN_WARNING "Failed to start orderly shutdown: " "forcing the issue\n"); - /* I guess this should try to kick off some daemon to - sync and poweroff asap. Or not even bother syncing - if we're doing an emergency shutdown? */ + /* + * I guess this should try to kick off some daemon to sync and + * poweroff asap. Or not even bother syncing if we're doing an + * emergency shutdown? + */ emergency_sync(); kernel_power_off(); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ab11879aeb..97186b99b0e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -30,6 +30,7 @@ #include <linux/security.h> #include <linux/ctype.h> #include <linux/kmemcheck.h> +#include <linux/kmemleak.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> @@ -174,6 +175,11 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; @@ -410,7 +416,7 @@ static struct ctl_table kern_table[] = { .data = core_pattern, .maxlen = CORENAME_MAX_SIZE, .mode = 0644, - .proc_handler = proc_dostring, + .proc_handler = proc_dostring_coredump, }, { .procname = "core_pipe_limit", @@ -1498,7 +1504,7 @@ static struct ctl_table fs_table[] = { .data = &suid_dumpable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_coredump, .extra1 = &zero, .extra2 = &two, }, @@ -1551,7 +1557,10 @@ static struct ctl_table dev_table[] = { int __init sysctl_init(void) { - register_sysctl_table(sysctl_base_table); + struct ctl_table_header *hdr; + + hdr = register_sysctl_table(sysctl_base_table); + kmemleak_not_leak(hdr); return 0; } @@ -2009,6 +2018,34 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, do_proc_dointvec_minmax_conv, ¶m); } +static void validate_coredump_safety(void) +{ + if (suid_dumpable == SUID_DUMPABLE_SAFE && + core_pattern[0] != '/' && core_pattern[0] != '|') { + printk(KERN_WARNING "Unsafe core_pattern used with "\ + "suid_dumpable=2. Pipe handler or fully qualified "\ + "core dump path required.\n"); + } +} + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dostring(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e66046456f4..d0a32796550 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -436,6 +436,11 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); + if (na == NULL) { + rc = -EMSGSIZE; + goto err; + } + stats = nla_data(na); memset(stats, 0, sizeof(*stats)); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4b1dfba70f7..69add8a9da6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -575,7 +575,7 @@ out: /* * Create/destroy watchdog threads as CPUs come and go: */ -static int __cpuinit +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; @@ -610,10 +610,27 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; +#ifdef CONFIG_SUSPEND +/* + * On exit from suspend we force an offline->online transition on the boot CPU + * so that the PMU state that was lost while in suspended state gets set up + * properly for the boot CPU. This information is required for restarting the + * NMI watchdog. + */ +void lockup_detector_bootcpu_resume(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + + cpu_callback(&cpu_nfb, CPU_DEAD_FROZEN, cpu); + cpu_callback(&cpu_nfb, CPU_UP_PREPARE_FROZEN, cpu); + cpu_callback(&cpu_nfb, CPU_ONLINE_FROZEN, cpu); +} +#endif + void __init lockup_detector_init(void) { void *cpu = (void *)(long)smp_processor_id(); |