diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/capability.c | 2 | ||||
-rw-r--r-- | kernel/cred.c | 16 | ||||
-rw-r--r-- | kernel/irq/migration.c | 14 | ||||
-rw-r--r-- | kernel/module.c | 16 | ||||
-rw-r--r-- | kernel/perf_event.c | 10 | ||||
-rw-r--r-- | kernel/power/main.c | 2 | ||||
-rw-r--r-- | kernel/power/process.c | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 7 | ||||
-rw-r--r-- | kernel/printk.c | 54 | ||||
-rw-r--r-- | kernel/ptrace.c | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 2 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 4 | ||||
-rw-r--r-- | kernel/timer.c | 8 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 19 | ||||
-rw-r--r-- | kernel/tracepoint.c | 31 | ||||
-rw-r--r-- | kernel/watchdog.c | 53 | ||||
-rw-r--r-- | kernel/workqueue.c | 37 |
20 files changed, 178 insertions, 130 deletions
diff --git a/kernel/capability.c b/kernel/capability.c index 2f05303715a..9e9385f132c 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -306,7 +306,7 @@ int capable(int cap) BUG(); } - if (security_capable(cap) == 0) { + if (security_capable(current_cred(), cap) == 0) { current->flags |= PF_SUPERPRIV; return 1; } diff --git a/kernel/cred.c b/kernel/cred.c index 6a1aa004e37..3a9d6dd53a6 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -252,13 +252,13 @@ struct cred *cred_alloc_blank(void) #endif atomic_set(&new->usage, 1); +#ifdef CONFIG_DEBUG_CREDENTIALS + new->magic = CRED_MAGIC; +#endif if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) goto error; -#ifdef CONFIG_DEBUG_CREDENTIALS - new->magic = CRED_MAGIC; -#endif return new; error: @@ -657,6 +657,8 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) validate_creds(old); *new = *old; + atomic_set(&new->usage, 1); + set_cred_subscribers(new, 0); get_uid(new->user); get_group_info(new->group_info); @@ -674,8 +676,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (security_prepare_creds(new, old, GFP_KERNEL) < 0) goto error; - atomic_set(&new->usage, 1); - set_cred_subscribers(new, 0); put_cred(old); validate_creds(new); return new; @@ -748,7 +748,11 @@ bool creds_are_invalid(const struct cred *cred) if (cred->magic != CRED_MAGIC) return true; #ifdef CONFIG_SECURITY_SELINUX - if (selinux_is_enabled()) { + /* + * cred->security == NULL if security_cred_alloc_blank() or + * security_prepare_creds() returned an error. + */ + if (selinux_is_enabled() && cred->security) { if ((unsigned long) cred->security < PAGE_SIZE) return true; if ((*(u32 *)cred->security & 0xffffff00) == diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 1d254194048..441fd629ff0 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -56,6 +56,7 @@ void move_masked_irq(int irq) void move_native_irq(int irq) { struct irq_desc *desc = irq_to_desc(irq); + bool masked; if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; @@ -63,8 +64,15 @@ void move_native_irq(int irq) if (unlikely(desc->status & IRQ_DISABLED)) return; - desc->irq_data.chip->irq_mask(&desc->irq_data); + /* + * Be careful vs. already masked interrupts. If this is a + * threaded interrupt with ONESHOT set, we can end up with an + * interrupt storm. + */ + masked = desc->status & IRQ_MASKED; + if (!masked) + desc->irq_data.chip->irq_mask(&desc->irq_data); move_masked_irq(irq); - desc->irq_data.chip->irq_unmask(&desc->irq_data); + if (!masked) + desc->irq_data.chip->irq_unmask(&desc->irq_data); } - diff --git a/kernel/module.c b/kernel/module.c index 34e00b708fa..efa290ea94b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info) #endif #ifdef CONFIG_TRACEPOINTS - mod->tracepoints = section_objs(info, "__tracepoints", - sizeof(*mod->tracepoints), - &mod->num_tracepoints); + mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", + sizeof(*mod->tracepoints_ptrs), + &mod->num_tracepoints); #endif #ifdef HAVE_JUMP_LABEL mod->jump_entries = section_objs(info, "__jump_table", @@ -3393,7 +3393,7 @@ void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, - struct tracepoint *tp) + struct tracepoint * const *tp) { } EXPORT_SYMBOL(module_layout); @@ -3407,8 +3407,8 @@ void module_update_tracepoints(void) mutex_lock(&module_mutex); list_for_each_entry(mod, &modules, list) if (!mod->taints) - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); + tracepoint_update_probe_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints); mutex_unlock(&module_mutex); } @@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter) else if (iter_mod > iter->module) iter->tracepoint = NULL; found = tracepoint_get_iter_range(&iter->tracepoint, - iter_mod->tracepoints, - iter_mod->tracepoints + iter_mod->tracepoints_ptrs, + iter_mod->tracepoints_ptrs + iter_mod->num_tracepoints); if (found) { iter->module = iter_mod; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 126a302c481..999835b6112 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1901,11 +1901,12 @@ static void __perf_event_read(void *info) return; raw_spin_lock(&ctx->lock); - update_context_time(ctx); + if (ctx->is_active) + update_context_time(ctx); update_event_times(event); + if (event->state == PERF_EVENT_STATE_ACTIVE) + event->pmu->read(event); raw_spin_unlock(&ctx->lock); - - event->pmu->read(event); } static inline u64 perf_event_count(struct perf_event *event) @@ -1999,8 +2000,7 @@ static int alloc_callchain_buffers(void) * accessed from NMI. Use a temporary manual per cpu allocation * until that gets sorted out. */ - size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * - num_possible_cpus(); + size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); entries = kzalloc(size, GFP_KERNEL); if (!entries) diff --git a/kernel/power/main.c b/kernel/power/main.c index 7b5db6a8561..701853042c2 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -326,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { - pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); + pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); return pm_wq ? 0 : -ENOMEM; } diff --git a/kernel/power/process.c b/kernel/power/process.c index d6d2a10320e..0cf3a27a6c9 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static inline int freezeable(struct task_struct * p) +static inline int freezable(struct task_struct * p) { if ((p == current) || (p->flags & PF_NOFREEZE) || @@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (frozen(p) || !freezeable(p)) + if (frozen(p) || !freezable(p)) continue; if (!freeze_task(p, sig_only)) @@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) + if (!freezable(p)) continue; if (nosig_only && should_send_signal(p)) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0dac75ea445..64db648ff91 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1519,11 +1519,8 @@ static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { - int error = 0; - if (nr_highmem > 0) { - error = get_highmem_buffer(PG_ANY); - if (error) + if (get_highmem_buffer(PG_ANY)) goto err_out; if (nr_highmem > alloc_highmem) { nr_highmem -= alloc_highmem; @@ -1546,7 +1543,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, err_out: swsusp_free(); - return error; + return -ENOMEM; } asmlinkage int swsusp_save(void) diff --git a/kernel/printk.c b/kernel/printk.c index 2ddbdc73aad..36231525e22 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -262,25 +262,47 @@ int dmesg_restrict = 1; int dmesg_restrict; #endif +static int syslog_action_restricted(int type) +{ + if (dmesg_restrict) + return 1; + /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ + return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; +} + +static int check_syslog_permissions(int type, bool from_file) +{ + /* + * If this is from /proc/kmsg and we've already opened it, then we've + * already done the capabilities checks at open time. + */ + if (from_file && type != SYSLOG_ACTION_OPEN) + return 0; + + if (syslog_action_restricted(type)) { + if (capable(CAP_SYSLOG)) + return 0; + /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ + if (capable(CAP_SYS_ADMIN)) { + WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " + "but no CAP_SYSLOG (deprecated).\n"); + return 0; + } + return -EPERM; + } + return 0; +} + int do_syslog(int type, char __user *buf, int len, bool from_file) { unsigned i, j, limit, count; int do_clear = 0; char c; - int error = 0; + int error; - /* - * If this is from /proc/kmsg we only do the capabilities checks - * at open time. - */ - if (type == SYSLOG_ACTION_OPEN || !from_file) { - if (dmesg_restrict && !capable(CAP_SYSLOG)) - goto warn; /* switch to return -EPERM after 2.6.39 */ - if ((type != SYSLOG_ACTION_READ_ALL && - type != SYSLOG_ACTION_SIZE_BUFFER) && - !capable(CAP_SYSLOG)) - goto warn; /* switch to return -EPERM after 2.6.39 */ - } + error = check_syslog_permissions(type, from_file); + if (error) + goto out; error = security_syslog(type); if (error) @@ -423,12 +445,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) } out: return error; -warn: - /* remove after 2.6.39 */ - if (capable(CAP_SYS_ADMIN)) - WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated and denied).\n"); - return -EPERM; } SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 99bbaa3e5b0..1708b1e2972 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -313,7 +313,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) child->exit_code = data; dead = __ptrace_detach(current, child); if (!child->exit_state) - wake_up_process(child); + wake_up_state(child, TASK_TRACED | TASK_STOPPED); } write_unlock_irq(&tasklist_lock); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c914ec747ca..ad6267714c8 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -625,7 +625,7 @@ static void update_curr_rt(struct rq *rq) struct rt_rq *rt_rq = rt_rq_of_se(rt_se); u64 delta_exec; - if (!task_has_rt_policy(curr)) + if (curr->sched_class != &rt_sched_class) return; delta_exec = rq->clock_task - curr->se.exec_start; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 32a19f9397f..3258455549f 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -41,7 +41,7 @@ static void print_name_offset(struct seq_file *m, void *sym) char symname[KSYM_NAME_LEN]; if (lookup_symbol_name((unsigned long)sym, symname) < 0) - SEQ_printf(m, "<%p>", sym); + SEQ_printf(m, "<%pK>", sym); else SEQ_printf(m, "%s", symname); } @@ -112,7 +112,7 @@ next_one: static void print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) { - SEQ_printf(m, " .base: %p\n", base); + SEQ_printf(m, " .base: %pK\n", base); SEQ_printf(m, " .index: %d\n", base->index); SEQ_printf(m, " .resolution: %Lu nsecs\n", diff --git a/kernel/timer.c b/kernel/timer.c index 43ca9936f2d..d6459923d24 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -959,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync); * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from - * hardirq contexts. The caller must not hold locks which would prevent + * interrupt contexts. The caller must not hold locks which would prevent * completion of the timer's handler. The timer's handler must not call * add_timer_on(). Upon exit the timer is not queued and the handler is * not running on any CPU. @@ -969,10 +969,12 @@ EXPORT_SYMBOL(try_to_del_timer_sync); int del_timer_sync(struct timer_list *timer) { #ifdef CONFIG_LOCKDEP - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); lock_map_acquire(&timer->lockdep_map); lock_map_release(&timer->lockdep_map); - local_bh_enable(); + local_irq_restore(flags); #endif /* * don't use it in hardirq context, because it diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 153562d0b93..d95721f3370 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) !blk_tracer_enabled)) return; + /* + * If the BLK_TC_NOTIFY action mask isn't set, don't send any note + * message to the trace. + */ + if (!(bt->act_mask & BLK_TC_NOTIFY)) + return; + local_irq_save(flags); buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); va_start(args, fmt); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 35fde09b81d..5f499e0438a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod) static void trace_module_add_events(struct module *mod) { struct ftrace_module_file_ops *file_ops = NULL; - struct ftrace_event_call *call, *start, *end; + struct ftrace_event_call **call, **start, **end; start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; @@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod) return; for_each_event(call, start, end) { - __trace_add_event_call(call, mod, + __trace_add_event_call(*call, mod, &file_ops->id, &file_ops->enable, &file_ops->filter, &file_ops->format); } @@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = { .priority = 0, }; -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; +extern struct ftrace_event_call *__start_ftrace_events[]; +extern struct ftrace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; @@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event); static __init int event_trace_init(void) { - struct ftrace_event_call *call; + struct ftrace_event_call **call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; @@ -1430,7 +1430,7 @@ static __init int event_trace_init(void) pr_warning("tracing: Failed to allocate common fields"); for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { - __trace_add_event_call(call, NULL, &ftrace_event_id_fops, + __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4b74d71705c..bbeec31e0ae 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ }; \ \ -struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ +struct ftrace_event_call __used event_##call = { \ .name = #call, \ .event.type = etype, \ .class = &event_class_ftrace_##call, \ .print_fmt = print, \ }; \ +struct ftrace_event_call __used \ +__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; #include "trace_entries.h" diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b706529b4fc..5c9fe08d209 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -55,20 +55,21 @@ struct ftrace_event_class event_class_syscall_exit = { .raw_init = init_syscall_trace, }; -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; +extern struct syscall_metadata *__start_syscalls_metadata[]; +extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +static __init struct syscall_metadata * +find_syscall_meta(unsigned long syscall) { - struct syscall_metadata *start; - struct syscall_metadata *stop; + struct syscall_metadata **start; + struct syscall_metadata **stop; char str[KSYM_SYMBOL_LEN]; - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; + start = __start_syscalls_metadata; + stop = __stop_syscalls_metadata; kallsyms_lookup(syscall, NULL, NULL, NULL, str); for ( ; start < stop; start++) { @@ -78,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) * with "SyS" instead of "sys", leading to an unwanted * mismatch. */ - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; + if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) + return *start; } return NULL; } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e95ee7f31d4..68187af4889 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -27,8 +27,8 @@ #include <linux/sched.h> #include <linux/jump_label.h> -extern struct tracepoint __start___tracepoints[]; -extern struct tracepoint __stop___tracepoints[]; +extern struct tracepoint * const __start___tracepoints_ptrs[]; +extern struct tracepoint * const __stop___tracepoints_ptrs[]; /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem) * * Updates the probe callback corresponding to a range of tracepoints. */ -void -tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) +void tracepoint_update_probe_range(struct tracepoint * const *begin, + struct tracepoint * const *end) { - struct tracepoint *iter; + struct tracepoint * const *iter; struct tracepoint_entry *mark_entry; if (!begin) @@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) mutex_lock(&tracepoints_mutex); for (iter = begin; iter < end; iter++) { - mark_entry = get_tracepoint(iter->name); + mark_entry = get_tracepoint((*iter)->name); if (mark_entry) { - set_tracepoint(&mark_entry, iter, + set_tracepoint(&mark_entry, *iter, !!mark_entry->refcount); } else { - disable_tracepoint(iter); + disable_tracepoint(*iter); } } mutex_unlock(&tracepoints_mutex); @@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) static void tracepoint_update_probes(void) { /* Core kernel tracepoints */ - tracepoint_update_probe_range(__start___tracepoints, - __stop___tracepoints); + tracepoint_update_probe_range(__start___tracepoints_ptrs, + __stop___tracepoints_ptrs); /* tracepoints in modules. */ module_update_tracepoints(); } @@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); * Will return the first tracepoint in the range if the input tracepoint is * NULL. */ -int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end) +int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, + struct tracepoint * const *begin, struct tracepoint * const *end) { if (!*tracepoint && begin != end) { *tracepoint = begin; @@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter) /* Core kernel tracepoints */ if (!iter->module) { found = tracepoint_get_iter_range(&iter->tracepoint, - __start___tracepoints, __stop___tracepoints); + __start___tracepoints_ptrs, + __stop___tracepoints_ptrs); if (found) goto end; } @@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self, switch (val) { case MODULE_STATE_COMING: case MODULE_STATE_GOING: - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); + tracepoint_update_probe_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints); break; } return 0; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d7ebdf4cea9..18bb15776c5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -27,7 +27,7 @@ #include <asm/irq_regs.h> #include <linux/perf_event.h> -int watchdog_enabled; +int watchdog_enabled = 1; int __read_mostly softlockup_thresh = 60; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif -static int no_watchdog; - - /* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: @@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str) if (!strncmp(str, "panic", 5)) hardlockup_panic = 1; else if (!strncmp(str, "0", 1)) - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); @@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -366,8 +363,14 @@ static int watchdog_nmi_enable(int cpu) goto out_save; } - printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", - cpu, PTR_ERR(event)); + + /* vary the KERN level based on the returned errno */ + if (PTR_ERR(event) == -EOPNOTSUPP) + printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); + else if (PTR_ERR(event) == -ENOENT) + printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); + else + printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); return PTR_ERR(event); /* success path */ @@ -432,9 +435,6 @@ static int watchdog_enable(int cpu) wake_up_process(p); } - /* if any cpu succeeds, watchdog is considered enabled for the system */ - watchdog_enabled = 1; - return 0; } @@ -462,12 +462,16 @@ static void watchdog_disable(int cpu) static void watchdog_enable_all_cpus(void) { int cpu; - int result = 0; + + watchdog_enabled = 0; for_each_online_cpu(cpu) - result += watchdog_enable(cpu); + if (!watchdog_enable(cpu)) + /* if any cpu succeeds, watchdog is considered + enabled for the system */ + watchdog_enabled = 1; - if (result) + if (!watchdog_enabled) printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); } @@ -476,9 +480,6 @@ static void watchdog_disable_all_cpus(void) { int cpu; - if (no_watchdog) - return; - for_each_online_cpu(cpu) watchdog_disable(cpu); @@ -498,10 +499,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write, { proc_dointvec(table, write, buffer, length, ppos); - if (watchdog_enabled) - watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); + if (write) { + if (watchdog_enabled) + watchdog_enable_all_cpus(); + else + watchdog_disable_all_cpus(); + } return 0; } @@ -530,7 +533,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - err = watchdog_enable(hotcpu); + if (watchdog_enabled) + err = watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: @@ -555,9 +559,6 @@ void __init lockup_detector_init(void) void *cpu = (void *)(long)smp_processor_id(); int err; - if (no_watchdog) - return; - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); WARN_ON(notifier_to_errno(err)); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 11869faa681..ee6578b578a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -79,7 +79,9 @@ enum { MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ - MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ + MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2, + /* call for help after 10ms + (min two ticks) */ MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ CREATE_COOLDOWN = HZ, /* time to breath after fail */ TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ @@ -2047,6 +2049,15 @@ repeat: move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); + + /* + * Leave this gcwq. If keep_working() is %true, notify a + * regular worker; otherwise, we end up with 0 concurrency + * and stalling the execution. + */ + if (keep_working(gcwq)) + wake_up_worker(gcwq); + spin_unlock_irq(&gcwq->lock); } @@ -2956,7 +2967,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, */ spin_lock(&workqueue_lock); - if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) + if (workqueue_freezing && wq->flags & WQ_FREEZABLE) for_each_cwq_cpu(cpu, wq) get_cwq(cpu, wq)->max_active = 0; @@ -3068,7 +3079,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) spin_lock_irq(&gcwq->lock); - if (!(wq->flags & WQ_FREEZEABLE) || + if (!(wq->flags & WQ_FREEZABLE) || !(gcwq->flags & GCWQ_FREEZING)) get_cwq(gcwq->cpu, wq)->max_active = max_active; @@ -3318,7 +3329,7 @@ static int __cpuinit trustee_thread(void *__gcwq) * want to get it over with ASAP - spam rescuers, wake up as * many idlers as necessary and create new ones till the * worklist is empty. Note that if the gcwq is frozen, there - * may be frozen works in freezeable cwqs. Don't declare + * may be frozen works in freezable cwqs. Don't declare * completion while frozen. */ while (gcwq->nr_workers != gcwq->nr_idle || @@ -3576,9 +3587,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu); /** * freeze_workqueues_begin - begin freezing workqueues * - * Start freezing workqueues. After this function returns, all - * freezeable workqueues will queue new works to their frozen_works - * list instead of gcwq->worklist. + * Start freezing workqueues. After this function returns, all freezable + * workqueues will queue new works to their frozen_works list instead of + * gcwq->worklist. * * CONTEXT: * Grabs and releases workqueue_lock and gcwq->lock's. @@ -3604,7 +3615,7 @@ void freeze_workqueues_begin(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (cwq && wq->flags & WQ_FREEZEABLE) + if (cwq && wq->flags & WQ_FREEZABLE) cwq->max_active = 0; } @@ -3615,7 +3626,7 @@ void freeze_workqueues_begin(void) } /** - * freeze_workqueues_busy - are freezeable workqueues still busy? + * freeze_workqueues_busy - are freezable workqueues still busy? * * Check whether freezing is complete. This function must be called * between freeze_workqueues_begin() and thaw_workqueues(). @@ -3624,8 +3635,8 @@ void freeze_workqueues_begin(void) * Grabs and releases workqueue_lock. * * RETURNS: - * %true if some freezeable workqueues are still busy. %false if - * freezing is complete. + * %true if some freezable workqueues are still busy. %false if freezing + * is complete. */ bool freeze_workqueues_busy(void) { @@ -3645,7 +3656,7 @@ bool freeze_workqueues_busy(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; BUG_ON(cwq->nr_active < 0); @@ -3690,7 +3701,7 @@ void thaw_workqueues(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; /* restore max_active and repopulate worklist */ |