diff options
Diffstat (limited to 'kernel')
54 files changed, 872 insertions, 522 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 5068e2a4e75..2251882daf5 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -124,8 +124,8 @@ config INLINE_SPIN_LOCK_IRQSAVE def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ ARCH_INLINE_SPIN_LOCK_IRQSAVE -config INLINE_SPIN_UNLOCK - def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) +config UNINLINE_SPIN_UNLOCK + bool config INLINE_SPIN_UNLOCK_BH def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 24e7cb0ba26..3f9c97419f0 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" select PREEMPT_COUNT + select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK help This option reduces the latency of the kernel by making all kernel code (that is not executing in a critical section) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f4ea4b6f3cf..ed64ccac67c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1883,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, */ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { - int retval; + int retval = 0; struct cgroup_subsys *ss, *failed_ss = NULL; struct cgroup *oldcgrp; struct cgroupfs_root *root = cgrp->root; diff --git a/kernel/compat.c b/kernel/compat.c index f346cedfe24..74ff8498809 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -31,11 +31,10 @@ #include <asm/uaccess.h> /* - * Note that the native side is already converted to a timespec, because - * that's what we want anyway. + * Get/set struct timeval with struct timespec on the native side */ -static int compat_get_timeval(struct timespec *o, - struct compat_timeval __user *i) +static int compat_get_timeval_convert(struct timespec *o, + struct compat_timeval __user *i) { long usec; @@ -46,8 +45,8 @@ static int compat_get_timeval(struct timespec *o, return 0; } -static int compat_put_timeval(struct compat_timeval __user *o, - struct timeval *i) +static int compat_put_timeval_convert(struct compat_timeval __user *o, + struct timeval *i) { return (put_user(i->tv_sec, &o->tv_sec) || put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; @@ -117,7 +116,7 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, if (tv) { struct timeval ktv; do_gettimeofday(&ktv); - if (compat_put_timeval(tv, &ktv)) + if (compat_put_timeval_convert(tv, &ktv)) return -EFAULT; } if (tz) { @@ -135,7 +134,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, struct timezone ktz; if (tv) { - if (compat_get_timeval(&kts, tv)) + if (compat_get_timeval_convert(&kts, tv)) return -EFAULT; } if (tz) { @@ -146,12 +145,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } +int get_compat_timeval(struct timeval *tv, const struct compat_timeval __user *ctv) +{ + return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) || + __get_user(tv->tv_sec, &ctv->tv_sec) || + __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; +} +EXPORT_SYMBOL_GPL(get_compat_timeval); + +int put_compat_timeval(const struct timeval *tv, struct compat_timeval __user *ctv) +{ + return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) || + __put_user(tv->tv_sec, &ctv->tv_sec) || + __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; +} +EXPORT_SYMBOL_GPL(put_compat_timeval); + int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) { return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || __get_user(ts->tv_sec, &cts->tv_sec) || __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; } +EXPORT_SYMBOL_GPL(get_compat_timespec); int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts) { @@ -161,6 +177,42 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user } EXPORT_SYMBOL_GPL(put_compat_timespec); +int compat_get_timeval(struct timeval *tv, const void __user *utv) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0; + else + return get_compat_timeval(tv, utv); +} +EXPORT_SYMBOL_GPL(compat_get_timeval); + +int compat_put_timeval(const struct timeval *tv, void __user *utv) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0; + else + return put_compat_timeval(tv, utv); +} +EXPORT_SYMBOL_GPL(compat_put_timeval); + +int compat_get_timespec(struct timespec *ts, const void __user *uts) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0; + else + return get_compat_timespec(ts, uts); +} +EXPORT_SYMBOL_GPL(compat_get_timespec); + +int compat_put_timespec(const struct timespec *ts, void __user *uts) +{ + if (COMPAT_USE_64BIT_TIME) + return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0; + else + return put_compat_timespec(ts, uts); +} +EXPORT_SYMBOL_GPL(compat_put_timespec); + static long compat_nanosleep_restart(struct restart_block *restart) { struct compat_timespec __user *rmtp; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1010cc61931..14f7070b4ba 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -270,11 +270,11 @@ static struct file_system_type cpuset_fs_type = { * are online. If none are online, walk up the cpuset hierarchy * until we find one that does have some online cpus. If we get * all the way to the top and still haven't found any online cpus, - * return cpu_online_map. Or if passed a NULL cs from an exit'ing - * task, return cpu_online_map. + * return cpu_online_mask. Or if passed a NULL cs from an exit'ing + * task, return cpu_online_mask. * * One way or another, we guarantee to return some non-empty subset - * of cpu_online_map. + * of cpu_online_mask. * * Call with callback_mutex held. */ @@ -867,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, int retval; int is_load_balanced; - /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ + /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ if (cs == &top_cpuset) return -EACCES; @@ -2149,7 +2149,7 @@ void __init cpuset_init_smp(void) * * Description: Returns the cpumask_var_t cpus_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty - * subset of cpu_online_map, even if this means going outside the + * subset of cpu_online_mask, even if this means going outside the * tasks cpuset. **/ @@ -2162,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) mutex_unlock(&callback_mutex); } -int cpuset_cpus_allowed_fallback(struct task_struct *tsk) +void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { const struct cpuset *cs; - int cpu; rcu_read_lock(); cs = task_cs(tsk); @@ -2186,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary * set any mask even if it is not right from task_cs() pov, * the pending set_cpus_allowed_ptr() will fix things. + * + * select_fallback_rq() will fix things ups and set cpu_possible_mask + * if required. */ - - cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask); - if (cpu >= nr_cpu_ids) { - /* - * Either tsk->cpus_allowed is wrong (see above) or it - * is actually empty. The latter case is only possible - * if we are racing with remove_tasks_in_empty_cpuset(). - * Like above we can temporary set any mask and rely on - * set_cpus_allowed_ptr() as synchronization point. - */ - do_set_cpus_allowed(tsk, cpu_possible_mask); - cpu = cpumask_any(cpu_active_mask); - } - - return cpu; } void cpuset_init_current_mems_allowed(void) diff --git a/kernel/cred.c b/kernel/cred.c index 97b36eeca4c..e70683d9ec3 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -386,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; + p->replacement_session_keyring = NULL; + if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 3f88a45e6f0..0557f24c6bc 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -53,7 +53,6 @@ #include <asm/cacheflush.h> #include <asm/byteorder.h> #include <linux/atomic.h> -#include <asm/system.h> #include "debug_core.h" @@ -161,37 +160,39 @@ early_param("nokgdbroundup", opt_nokgdbroundup); * Weak aliases for breakpoint management, * can be overriden by architectures when needed: */ -int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) +int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; - err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE); + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); if (err) return err; - - return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr, - BREAK_INSTR_SIZE); + err = probe_kernel_write((char *)bpt->bpt_addr, + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); + return err; } -int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) +int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - return probe_kernel_write((char *)addr, - (char *)bundle, BREAK_INSTR_SIZE); + return probe_kernel_write((char *)bpt->bpt_addr, + (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } int __weak kgdb_validate_break_address(unsigned long addr) { - char tmp_variable[BREAK_INSTR_SIZE]; + struct kgdb_bkpt tmp; int err; - /* Validate setting the breakpoint and then removing it. In the + /* Validate setting the breakpoint and then removing it. If the * remove fails, the kernel needs to emit a bad message because we * are deep trouble not being able to put things back the way we * found them. */ - err = kgdb_arch_set_breakpoint(addr, tmp_variable); + tmp.bpt_addr = addr; + err = kgdb_arch_set_breakpoint(&tmp); if (err) return err; - err = kgdb_arch_remove_breakpoint(addr, tmp_variable); + err = kgdb_arch_remove_breakpoint(&tmp); if (err) printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " "memory destroyed at: %lx", addr); @@ -235,7 +236,6 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) */ int dbg_activate_sw_breakpoints(void) { - unsigned long addr; int error; int ret = 0; int i; @@ -244,16 +244,15 @@ int dbg_activate_sw_breakpoints(void) if (kgdb_break[i].state != BP_SET) continue; - addr = kgdb_break[i].bpt_addr; - error = kgdb_arch_set_breakpoint(addr, - kgdb_break[i].saved_instr); + error = kgdb_arch_set_breakpoint(&kgdb_break[i]); if (error) { ret = error; - printk(KERN_INFO "KGDB: BP install failed: %lx", addr); + printk(KERN_INFO "KGDB: BP install failed: %lx", + kgdb_break[i].bpt_addr); continue; } - kgdb_flush_swbreak_addr(addr); + kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr); kgdb_break[i].state = BP_ACTIVE; } return ret; @@ -302,7 +301,6 @@ int dbg_set_sw_break(unsigned long addr) int dbg_deactivate_sw_breakpoints(void) { - unsigned long addr; int error; int ret = 0; int i; @@ -310,15 +308,14 @@ int dbg_deactivate_sw_breakpoints(void) for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { if (kgdb_break[i].state != BP_ACTIVE) continue; - addr = kgdb_break[i].bpt_addr; - error = kgdb_arch_remove_breakpoint(addr, - kgdb_break[i].saved_instr); + error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) { - printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); + printk(KERN_INFO "KGDB: BP remove failed: %lx\n", + kgdb_break[i].bpt_addr); ret = error; } - kgdb_flush_swbreak_addr(addr); + kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr); kgdb_break[i].state = BP_SET; } return ret; @@ -352,7 +349,6 @@ int kgdb_isremovedbreak(unsigned long addr) int dbg_remove_all_break(void) { - unsigned long addr; int error; int i; @@ -360,12 +356,10 @@ int dbg_remove_all_break(void) for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { if (kgdb_break[i].state != BP_ACTIVE) goto setundefined; - addr = kgdb_break[i].bpt_addr; - error = kgdb_arch_remove_breakpoint(addr, - kgdb_break[i].saved_instr); + error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", - addr); + kgdb_break[i].bpt_addr); setundefined: kgdb_break[i].state = BP_UNDEFINED; } diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 7179eac7b41..07c9bbb94a0 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -15,7 +15,6 @@ #include <linux/sched.h> #include <linux/kdb.h> #include <linux/nmi.h> -#include <asm/system.h> #include "kdb_private.h" diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 9b5f17da1c5..bb9520f0f6f 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -743,7 +743,7 @@ kdb_printit: kdb_input_flush(); c = console_drivers; - if (!dbg_io_ops->is_console) { + if (dbg_io_ops && !dbg_io_ops->is_console) { len = strlen(moreprompt); cp = moreprompt; while (len--) { diff --git a/kernel/dma.c b/kernel/dma.c index 68a2306522c..6c6262f86c1 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -18,7 +18,6 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <asm/dma.h> -#include <asm/system.h> diff --git a/kernel/events/core.c b/kernel/events/core.c index 4b50357914f..a6a9ec4cd8f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event, *running = ctx_time - event->tstamp_running; } -void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { } @@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event) userpg->time_running = running + atomic64_read(&event->child_total_time_running); - perf_update_user_clock(userpg, now); + arch_perf_update_userpage(userpg, now); barrier(); ++userpg->lock; @@ -7116,6 +7116,13 @@ void __init perf_event_init(void) /* do not patch jump label more than once per second */ jump_label_rate_limit(&perf_sched_events, HZ); + + /* + * Build time assertion that we keep the data_head at the intended + * location. IOW, validation we got the __reserved[] size right. + */ + BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head)) + != 1024); } static int __init perf_event_sysfs_init(void) diff --git a/kernel/exit.c b/kernel/exit.c index 3db1909faed..d8bd3b425fa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -474,7 +474,7 @@ static void close_files(struct files_struct * files) i = j * __NFDBITS; if (i >= fdt->max_fds) break; - set = fdt->open_fds->fds_bits[j++]; + set = fdt->open_fds[j++]; while (set) { if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); diff --git a/kernel/futex.c b/kernel/futex.c index 72efa1e4359..e2b0fb9a0b3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -59,6 +59,7 @@ #include <linux/magic.h> #include <linux/pid.h> #include <linux/nsproxy.h> +#include <linux/ptrace.h> #include <asm/futex.h> @@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, { struct robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); + + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 5f9e689dc8f..83e368b005f 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -10,6 +10,7 @@ #include <linux/compat.h> #include <linux/nsproxy.h> #include <linux/futex.h> +#include <linux/ptrace.h> #include <asm/uaccess.h> @@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); + + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->compat_robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->compat_robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->compat_robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5a38bf4de64..cf1a4a68ce4 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -13,7 +13,7 @@ config GENERIC_HARDIRQS # Options selectable by the architecture code # Make sparse irq Kconfig switch below available -config HAVE_SPARSE_IRQ +config MAY_HAVE_SPARSE_IRQ bool # Enable the generic irq autoprobe mechanism @@ -56,13 +56,22 @@ config GENERIC_IRQ_CHIP config IRQ_DOMAIN bool +config IRQ_DOMAIN_DEBUG + bool "Expose hardware/virtual IRQ mapping via debugfs" + depends on IRQ_DOMAIN && DEBUG_FS + help + This option will show the mapping relationship between hardware irq + numbers and Linux irq numbers. The mapping is exposed via debugfs + in the file "virq_mapping". + + If you don't know what this means you don't need it. + # Support forced irq threading config IRQ_FORCED_THREADING bool config SPARSE_IRQ - bool "Support sparse irq numbering" - depends on HAVE_SPARSE_IRQ + bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ ---help--- Sparse irq numbering is useful for distro kernels that want diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 6ff84e6a954..bdb18032555 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -54,14 +54,18 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) { /* - * Wake up the handler thread for this action. In case the - * thread crashed and was killed we just pretend that we - * handled the interrupt. The hardirq handler has disabled the - * device interrupt, so no irq storm is lurking. If the + * In case the thread crashed and was killed we just pretend that + * we handled the interrupt. The hardirq handler has disabled the + * device interrupt, so no irq storm is lurking. + */ + if (action->thread->flags & PF_EXITING) + return; + + /* + * Wake up the handler thread for this action. If the * RUNTHREAD bit is already set, nothing to do. */ - if ((action->thread->flags & PF_EXITING) || - test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags)) return; /* diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index af48e59bc2f..3601f3fbf67 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -632,7 +632,7 @@ unsigned int irq_linear_revmap(struct irq_domain *domain, return revmap[hwirq]; } -#ifdef CONFIG_VIRQ_DEBUG +#ifdef CONFIG_IRQ_DOMAIN_DEBUG static int virq_debug_show(struct seq_file *m, void *private) { unsigned long flags; @@ -668,7 +668,7 @@ static int virq_debug_show(struct seq_file *m, void *private) data = irq_desc_get_chip_data(desc); seq_printf(m, "0x%16p ", data); - if (desc->irq_data.domain->of_node) + if (desc->irq_data.domain && desc->irq_data.domain->of_node) p = desc->irq_data.domain->of_node->full_name; else p = none; @@ -695,14 +695,14 @@ static const struct file_operations virq_debug_fops = { static int __init irq_debugfs_init(void) { - if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root, + if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL, NULL, &virq_debug_fops) == NULL) return -ENOMEM; return 0; } __initcall(irq_debugfs_init); -#endif /* CONFIG_VIRQ_DEBUG */ +#endif /* CONFIG_IRQ_DOMAIN_DEBUG */ int irq_domain_simple_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index b0ccd1ac2d6..89a3ea82569 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -282,7 +282,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) { struct irq_chip *chip = irq_desc_get_chip(desc); struct cpumask *set = irq_default_affinity; - int ret; + int ret, node = desc->irq_data.node; /* Excludes PER_CPU and NO_BALANCE interrupts */ if (!irq_can_set_affinity(irq)) @@ -301,6 +301,13 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) } cpumask_and(mask, cpu_online_mask, set); + if (node != NUMA_NO_NODE) { + const struct cpumask *nodemask = cpumask_of_node(node); + + /* make sure at least one of the cpus in nodemask is online */ + if (cpumask_intersects(mask, nodemask)) + cpumask_and(mask, mask, nodemask); + } ret = chip->irq_set_affinity(&desc->irq_data, mask, false); switch (ret) { case IRQ_SET_MASK_OK: @@ -645,7 +652,7 @@ static int irq_wait_for_interrupt(struct irqaction *action) * is marked MASKED. */ static void irq_finalize_oneshot(struct irq_desc *desc, - struct irqaction *action, bool force) + struct irqaction *action) { if (!(desc->istate & IRQS_ONESHOT)) return; @@ -679,7 +686,7 @@ again: * we would clear the threads_oneshot bit of this thread which * was just set. */ - if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) goto out_unlock; desc->threads_oneshot &= ~action->thread_mask; @@ -739,7 +746,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) local_bh_disable(); ret = action->thread_fn(action->irq, action->dev_id); - irq_finalize_oneshot(desc, action, false); + irq_finalize_oneshot(desc, action); local_bh_enable(); return ret; } @@ -755,7 +762,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, irqreturn_t ret; ret = action->thread_fn(action->irq, action->dev_id); - irq_finalize_oneshot(desc, action, false); + irq_finalize_oneshot(desc, action); return ret; } @@ -844,7 +851,7 @@ void exit_irq_thread(void) wake_threads_waitq(desc); /* Prevent a stale desc->threads_oneshot */ - irq_finalize_oneshot(desc, action, true); + irq_finalize_oneshot(desc, action); } static void irq_setup_forced_threading(struct irqaction *new) diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 47420908fba..c3c89751b32 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -43,12 +43,16 @@ void irq_move_masked_irq(struct irq_data *idata) * masking the irqs. */ if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) - < nr_cpu_ids)) - if (!chip->irq_set_affinity(&desc->irq_data, - desc->pending_mask, false)) { + < nr_cpu_ids)) { + int ret = chip->irq_set_affinity(&desc->irq_data, + desc->pending_mask, false); + switch (ret) { + case IRQ_SET_MASK_OK: cpumask_copy(desc->irq_data.affinity, desc->pending_mask); + case IRQ_SET_MASK_OK_NOCOPY: irq_set_thread_affinity(desc); } + } cpumask_clear(desc->pending_mask); } diff --git a/kernel/irq_work.c b/kernel/irq_work.c index c3c46c72046..0c56d44b9fd 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -5,6 +5,7 @@ * context. The enqueueing is NMI-safe. */ +#include <linux/bug.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/irq_work.h> diff --git a/kernel/kexec.c b/kernel/kexec.c index a6a675cb981..4e2e472f6ae 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -37,7 +37,6 @@ #include <asm/page.h> #include <asm/uaccess.h> #include <asm/io.h> -#include <asm/system.h> #include <asm/sections.h> /* Per cpu memory for storing cpu states in case of system crash. */ @@ -1359,6 +1358,10 @@ static int __init parse_crashkernel_simple(char *cmdline, if (*cur == '@') *crash_base = memparse(cur+1, &cur); + else if (*cur != ' ' && *cur != '\0') { + pr_warning("crashkernel: unrecognized char\n"); + return -EINVAL; + } return 0; } @@ -1462,7 +1465,9 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); +#ifdef CONFIG_MMU VMCOREINFO_SYMBOL(swapper_pg_dir); +#endif VMCOREINFO_SYMBOL(_stext); VMCOREINFO_SYMBOL(vmlist); diff --git a/kernel/kmod.c b/kernel/kmod.c index 957a7aab8eb..05698a7415f 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -322,7 +322,7 @@ static void __call_usermodehelper(struct work_struct *work) * land has been frozen during a system-wide hibernation or suspend operation). * Should always be manipulated under umhelper_sem acquired for write. */ -static int usermodehelper_disabled = 1; +static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED; /* Number of helpers running */ static atomic_t running_helpers = ATOMIC_INIT(0); @@ -334,32 +334,110 @@ static atomic_t running_helpers = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); /* + * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled + * to become 'false'. + */ +static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq); + +/* * Time to wait for running_helpers to become zero before the setting of * usermodehelper_disabled in usermodehelper_disable() fails */ #define RUNNING_HELPERS_TIMEOUT (5 * HZ) -void read_lock_usermodehelper(void) +int usermodehelper_read_trylock(void) { + DEFINE_WAIT(wait); + int ret = 0; + down_read(&umhelper_sem); + for (;;) { + prepare_to_wait(&usermodehelper_disabled_waitq, &wait, + TASK_INTERRUPTIBLE); + if (!usermodehelper_disabled) + break; + + if (usermodehelper_disabled == UMH_DISABLED) + ret = -EAGAIN; + + up_read(&umhelper_sem); + + if (ret) + break; + + schedule(); + try_to_freeze(); + + down_read(&umhelper_sem); + } + finish_wait(&usermodehelper_disabled_waitq, &wait); + return ret; +} +EXPORT_SYMBOL_GPL(usermodehelper_read_trylock); + +long usermodehelper_read_lock_wait(long timeout) +{ + DEFINE_WAIT(wait); + + if (timeout < 0) + return -EINVAL; + + down_read(&umhelper_sem); + for (;;) { + prepare_to_wait(&usermodehelper_disabled_waitq, &wait, + TASK_UNINTERRUPTIBLE); + if (!usermodehelper_disabled) + break; + + up_read(&umhelper_sem); + + timeout = schedule_timeout(timeout); + if (!timeout) + break; + + down_read(&umhelper_sem); + } + finish_wait(&usermodehelper_disabled_waitq, &wait); + return timeout; } -EXPORT_SYMBOL_GPL(read_lock_usermodehelper); +EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait); -void read_unlock_usermodehelper(void) +void usermodehelper_read_unlock(void) { up_read(&umhelper_sem); } -EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); +EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); /** - * usermodehelper_disable - prevent new helpers from being started + * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. + * depth: New value to assign to usermodehelper_disabled. + * + * Change the value of usermodehelper_disabled (under umhelper_sem locked for + * writing) and wakeup tasks waiting for it to change. */ -int usermodehelper_disable(void) +void __usermodehelper_set_disable_depth(enum umh_disable_depth depth) +{ + down_write(&umhelper_sem); + usermodehelper_disabled = depth; + wake_up(&usermodehelper_disabled_waitq); + up_write(&umhelper_sem); +} + +/** + * __usermodehelper_disable - Prevent new helpers from being started. + * @depth: New value to assign to usermodehelper_disabled. + * + * Set usermodehelper_disabled to @depth and wait for running helpers to exit. + */ +int __usermodehelper_disable(enum umh_disable_depth depth) { long retval; + if (!depth) + return -EINVAL; + down_write(&umhelper_sem); - usermodehelper_disabled = 1; + usermodehelper_disabled = depth; up_write(&umhelper_sem); /* @@ -374,31 +452,10 @@ int usermodehelper_disable(void) if (retval) return 0; - down_write(&umhelper_sem); - usermodehelper_disabled = 0; - up_write(&umhelper_sem); + __usermodehelper_set_disable_depth(UMH_ENABLED); return -EAGAIN; } -/** - * usermodehelper_enable - allow new helpers to be started again - */ -void usermodehelper_enable(void) -{ - down_write(&umhelper_sem); - usermodehelper_disabled = 0; - up_write(&umhelper_sem); -} - -/** - * usermodehelper_is_disabled - check if new helpers are allowed to be started - */ -bool usermodehelper_is_disabled(void) -{ - return usermodehelper_disabled; -} -EXPORT_SYMBOL_GPL(usermodehelper_is_disabled); - static void helper_lock(void) { atomic_inc(&running_helpers); diff --git a/kernel/module.c b/kernel/module.c index 2c932760fd3..78ac6ec1e42 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -105,6 +105,7 @@ struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ /* Block module loading/unloading? */ int modules_disabled = 0; +core_param(nomodule, modules_disabled, bint, 0); /* Waiting for a module to finish initializing? */ static DECLARE_WAIT_QUEUE_HEAD(module_wq); @@ -903,6 +904,36 @@ static ssize_t show_refcnt(struct module_attribute *mattr, static struct module_attribute modinfo_refcnt = __ATTR(refcnt, 0444, show_refcnt, NULL); +void __module_get(struct module *module) +{ + if (module) { + preempt_disable(); + __this_cpu_inc(module->refptr->incs); + trace_module_get(module, _RET_IP_); + preempt_enable(); + } +} +EXPORT_SYMBOL(__module_get); + +bool try_module_get(struct module *module) +{ + bool ret = true; + + if (module) { + preempt_disable(); + + if (likely(module_is_live(module))) { + __this_cpu_inc(module->refptr->incs); + trace_module_get(module, _RET_IP_); + } else + ret = false; + + preempt_enable(); + } + return ret; +} +EXPORT_SYMBOL(try_module_get); + void module_put(struct module *module) { if (module) { @@ -2380,8 +2411,7 @@ static int copy_and_check(struct load_info *info, return -ENOEXEC; /* Suck in entire file: we'll want most of it. */ - /* vmalloc barfs on "unusual" numbers. Check here */ - if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) + if ((hdr = vmalloc(len)) == NULL) return -ENOMEM; if (copy_from_user(hdr, umod, len) != 0) { @@ -2922,7 +2952,8 @@ static struct module *load_module(void __user *umod, mutex_unlock(&module_mutex); /* Module is ready to execute: parsing args may do that. */ - err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); + err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, + -32768, 32767, NULL); if (err < 0) goto unlink; diff --git a/kernel/padata.c b/kernel/padata.c index 6f10eb285ec..89fe3d1b9ef 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -1,6 +1,8 @@ /* * padata.c - generic interface to process data streams in parallel * + * See Documentation/padata.txt for an api documentation. + * * Copyright (C) 2008, 2009 secunet Security Networks AG * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> * @@ -354,13 +356,13 @@ static int padata_setup_cpumasks(struct parallel_data *pd, if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) return -ENOMEM; - cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask); + cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { free_cpumask_var(pd->cpumask.cbcpu); return -ENOMEM; } - cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask); + cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); return 0; } @@ -564,7 +566,7 @@ EXPORT_SYMBOL(padata_unregister_cpumask_notifier); static bool padata_validate_cpumask(struct padata_instance *pinst, const struct cpumask *cpumask) { - if (!cpumask_intersects(cpumask, cpu_active_mask)) { + if (!cpumask_intersects(cpumask, cpu_online_mask)) { pinst->flags |= PADATA_INVALID; return false; } @@ -678,7 +680,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu) { struct parallel_data *pd; - if (cpumask_test_cpu(cpu, cpu_active_mask)) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) { pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, pinst->cpumask.cbcpu); if (!pd) @@ -746,6 +748,9 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) return -ENOMEM; padata_replace(pinst, pd); + + cpumask_clear_cpu(cpu, pd->cpumask.cbcpu); + cpumask_clear_cpu(cpu, pd->cpumask.pcpu); } return 0; diff --git a/kernel/params.c b/kernel/params.c index 47f5bf12434..f37d8263134 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -87,6 +87,8 @@ static int parse_one(char *param, char *val, const struct kernel_param *params, unsigned num_params, + s16 min_level, + s16 max_level, int (*handle_unknown)(char *param, char *val)) { unsigned int i; @@ -95,6 +97,9 @@ static int parse_one(char *param, /* Find parameter */ for (i = 0; i < num_params; i++) { if (parameq(param, params[i].name)) { + if (params[i].level < min_level + || params[i].level > max_level) + return 0; /* No one handled NULL, so do it here. */ if (!val && params[i].ops->set != param_set_bool && params[i].ops->set != param_set_bint) @@ -174,6 +179,8 @@ int parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, + s16 min_level, + s16 max_level, int (*unknown)(char *param, char *val)) { char *param, *val; @@ -189,7 +196,8 @@ int parse_args(const char *name, args = next_arg(args, ¶m, &val); irq_was_disabled = irqs_disabled(); - ret = parse_one(param, val, params, num, unknown); + ret = parse_one(param, val, params, num, + min_level, max_level, unknown); if (irq_was_disabled && !irqs_disabled()) { printk(KERN_WARNING "parse_args(): option '%s' enabled " "irq's!\n", param); @@ -297,35 +305,18 @@ EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, const struct kernel_param *kp) { - bool v; - int ret; - /* No equals means "set"... */ if (!val) val = "1"; /* One of =[yYnN01] */ - ret = strtobool(val, &v); - if (ret) - return ret; - - if (kp->flags & KPARAM_ISBOOL) - *(bool *)kp->arg = v; - else - *(int *)kp->arg = v; - return 0; + return strtobool(val, kp->arg); } EXPORT_SYMBOL(param_set_bool); int param_get_bool(char *buffer, const struct kernel_param *kp) { - bool val; - if (kp->flags & KPARAM_ISBOOL) - val = *(bool *)kp->arg; - else - val = *(int *)kp->arg; - /* Y and N chosen as being relatively non-coder friendly */ - return sprintf(buffer, "%c", val ? 'Y' : 'N'); + return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N'); } EXPORT_SYMBOL(param_get_bool); @@ -343,7 +334,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp) struct kernel_param dummy; dummy.arg = &boolval; - dummy.flags = KPARAM_ISBOOL; ret = param_set_bool(val, &dummy); if (ret == 0) *(bool *)kp->arg = !boolval; @@ -372,7 +362,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp) /* Match bool exactly, by re-using it. */ boolkp = *kp; boolkp.arg = &v; - boolkp.flags |= KPARAM_ISBOOL; ret = param_set_bool(val, &boolkp); if (ret == 0) @@ -393,7 +382,7 @@ static int param_array(const char *name, unsigned int min, unsigned int max, void *elem, int elemsize, int (*set)(const char *, const struct kernel_param *kp), - u16 flags, + s16 level, unsigned int *num) { int ret; @@ -403,7 +392,7 @@ static int param_array(const char *name, /* Get the name right for errors. */ kp.name = name; kp.arg = elem; - kp.flags = flags; + kp.level = level; *num = 0; /* We expect a comma-separated list of values. */ @@ -444,7 +433,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp) unsigned int temp_num; return param_array(kp->name, val, 1, arr->max, arr->elem, - arr->elemsize, arr->ops->set, kp->flags, + arr->elemsize, arr->ops->set, kp->level, arr->num ?: &temp_num); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 17b232869a0..57bc1fd35b3 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -15,6 +15,7 @@ #include <linux/acct.h> #include <linux/slab.h> #include <linux/proc_fs.h> +#include <linux/reboot.h> #define BITS_PER_PAGE (PAGE_SIZE*8) @@ -183,6 +184,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) rc = sys_wait4(-1, NULL, __WALL, NULL); } while (rc != -ECHILD); + if (pid_ns->reboot) + current->signal->group_exit_code = pid_ns->reboot; + acct_exit_ns(pid_ns); return; } @@ -217,6 +221,35 @@ static struct ctl_table pid_ns_ctl_table[] = { static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; +int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) +{ + if (pid_ns == &init_pid_ns) + return 0; + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART2: + case LINUX_REBOOT_CMD_RESTART: + pid_ns->reboot = SIGHUP; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + case LINUX_REBOOT_CMD_HALT: + pid_ns->reboot = SIGINT; + break; + default: + return -EINVAL; + } + + read_lock(&tasklist_lock); + force_sig(SIGKILL, pid_ns->child_reaper); + read_unlock(&tasklist_lock); + + do_exit(0); + + /* Not reached */ + return 0; +} + static __init int pid_namespaces_init(void) { pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 0a186cfde78..e09dfbfeece 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -16,7 +16,6 @@ #include <linux/string.h> #include <linux/device.h> #include <linux/async.h> -#include <linux/kmod.h> #include <linux/delay.h> #include <linux/fs.h> #include <linux/mount.h> @@ -611,14 +610,10 @@ int hibernate(void) if (error) goto Exit; - error = usermodehelper_disable(); - if (error) - goto Exit; - /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); if (error) - goto Enable_umh; + goto Exit; printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); @@ -661,8 +656,6 @@ int hibernate(void) Free_bitmaps: free_basic_memory_bitmaps(); - Enable_umh: - usermodehelper_enable(); Exit: pm_notifier_call_chain(PM_POST_HIBERNATION); pm_restore_console(); @@ -777,15 +770,9 @@ static int software_resume(void) if (error) goto close_finish; - error = usermodehelper_disable(); - if (error) - goto close_finish; - error = create_basic_memory_bitmaps(); - if (error) { - usermodehelper_enable(); + if (error) goto close_finish; - } pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); @@ -806,7 +793,6 @@ static int software_resume(void) thaw_processes(); Done: free_basic_memory_bitmaps(); - usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); pm_restore_console(); diff --git a/kernel/power/process.c b/kernel/power/process.c index 0d2aeb22610..19db29f6755 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -16,6 +16,7 @@ #include <linux/freezer.h> #include <linux/delay.h> #include <linux/workqueue.h> +#include <linux/kmod.h> /* * Timeout for stopping processes @@ -122,6 +123,10 @@ int freeze_processes(void) { int error; + error = __usermodehelper_disable(UMH_FREEZING); + if (error) + return error; + if (!pm_freezing) atomic_inc(&system_freezing_cnt); @@ -130,6 +135,7 @@ int freeze_processes(void) error = try_to_freeze_tasks(true); if (!error) { printk("done."); + __usermodehelper_set_disable_depth(UMH_DISABLED); oom_killer_disable(); } printk("\n"); @@ -187,6 +193,8 @@ void thaw_processes(void) } while_each_thread(g, p); read_unlock(&tasklist_lock); + usermodehelper_enable(); + schedule(); printk("done.\n"); } diff --git a/kernel/power/qos.c b/kernel/power/qos.c index d6d6dbd1ecc..6a031e68402 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -230,6 +230,21 @@ int pm_qos_request_active(struct pm_qos_request *req) EXPORT_SYMBOL_GPL(pm_qos_request_active); /** + * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout + * @work: work struct for the delayed work (timeout) + * + * This cancels the timeout request by falling back to the default at timeout. + */ +static void pm_qos_work_fn(struct work_struct *work) +{ + struct pm_qos_request *req = container_of(to_delayed_work(work), + struct pm_qos_request, + work); + + pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); +} + +/** * pm_qos_add_request - inserts new qos request into the list * @req: pointer to a preallocated handle * @pm_qos_class: identifies which list of qos request to use @@ -253,6 +268,7 @@ void pm_qos_add_request(struct pm_qos_request *req, return; } req->pm_qos_class = pm_qos_class; + INIT_DELAYED_WORK(&req->work, pm_qos_work_fn); pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, &req->node, PM_QOS_ADD_REQ, value); } @@ -279,6 +295,9 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + if (new_value != req->node.prio) pm_qos_update_target( pm_qos_array[req->pm_qos_class]->constraints, @@ -287,6 +306,34 @@ void pm_qos_update_request(struct pm_qos_request *req, EXPORT_SYMBOL_GPL(pm_qos_update_request); /** + * pm_qos_update_request_timeout - modifies an existing qos request temporarily. + * @req : handle to list element holding a pm_qos request to use + * @new_value: defines the temporal qos request + * @timeout_us: the effective duration of this qos request in usecs. + * + * After timeout_us, this qos request is cancelled automatically. + */ +void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, + unsigned long timeout_us) +{ + if (!req) + return; + if (WARN(!pm_qos_request_active(req), + "%s called for unknown object.", __func__)) + return; + + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); + + schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us)); +} + +/** * pm_qos_remove_request - modifies an existing qos request * @req: handle to request list element * @@ -305,6 +352,9 @@ void pm_qos_remove_request(struct pm_qos_request *req) return; } + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, &req->node, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 88e5c967370..396d262b8fd 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -12,7 +12,6 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/init.h> -#include <linux/kmod.h> #include <linux/console.h> #include <linux/cpu.h> #include <linux/syscalls.h> @@ -102,17 +101,12 @@ static int suspend_prepare(void) if (error) goto Finish; - error = usermodehelper_disable(); - if (error) - goto Finish; - error = suspend_freeze_processes(); if (!error) return 0; suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); - usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); @@ -259,7 +253,6 @@ int suspend_devices_and_enter(suspend_state_t state) static void suspend_finish(void) { suspend_thaw_processes(); - usermodehelper_enable(); pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); } diff --git a/kernel/power/user.c b/kernel/power/user.c index 33c4329205a..91b0fd021a9 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -12,7 +12,6 @@ #include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/reboot.h> -#include <linux/kmod.h> #include <linux/string.h> #include <linux/device.h> #include <linux/miscdevice.h> @@ -222,14 +221,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, sys_sync(); printk("done.\n"); - error = usermodehelper_disable(); - if (error) - break; - error = freeze_processes(); - if (error) - usermodehelper_enable(); - else + if (!error) data->frozen = 1; break; @@ -238,7 +231,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; pm_restore_gfp_mask(); thaw_processes(); - usermodehelper_enable(); data->frozen = 0; break; diff --git a/kernel/rwsem.c b/kernel/rwsem.c index b152f74f02d..6850f53e02d 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -10,7 +10,6 @@ #include <linux/export.h> #include <linux/rwsem.h> -#include <asm/system.h> #include <linux/atomic.h> /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 503d6426126..4603b9d8f30 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -73,6 +73,7 @@ #include <linux/init_task.h> #include <linux/binfmts.h> +#include <asm/switch_to.h> #include <asm/tlb.h> #include <asm/irq_regs.h> #include <asm/mutex.h> @@ -1264,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process); */ static int select_fallback_rq(int cpu, struct task_struct *p) { - int dest_cpu; const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); + enum { cpuset, possible, fail } state = cpuset; + int dest_cpu; /* Look for allowed, online CPU in same node. */ - for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) + for_each_cpu(dest_cpu, nodemask) { + if (!cpu_online(dest_cpu)) + continue; + if (!cpu_active(dest_cpu)) + continue; if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) return dest_cpu; + } + + for (;;) { + /* Any allowed, online CPU? */ + for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { + if (!cpu_online(dest_cpu)) + continue; + if (!cpu_active(dest_cpu)) + continue; + goto out; + } - /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); - if (dest_cpu < nr_cpu_ids) - return dest_cpu; + switch (state) { + case cpuset: + /* No more Mr. Nice Guy. */ + cpuset_cpus_allowed_fallback(p); + state = possible; + break; - /* No more Mr. Nice Guy. */ - dest_cpu = cpuset_cpus_allowed_fallback(p); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk_sched("process %d (%s) no longer affine to cpu%d\n", - task_pid_nr(p), p->comm, cpu); + case possible: + do_set_cpus_allowed(p, cpu_possible_mask); + state = fail; + break; + + case fail: + BUG(); + break; + } + } + +out: + if (state != cpuset) { + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk_sched("process %d (%s) no longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); + } } return dest_cpu; @@ -1933,6 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); + finish_arch_post_lock_switch(); fire_sched_in_preempt_notifiers(current); if (mm) @@ -3070,8 +3102,6 @@ EXPORT_SYMBOL(sub_preempt_count); */ static noinline void __schedule_bug(struct task_struct *prev) { - struct pt_regs *regs = get_irq_regs(); - if (oops_in_progress) return; @@ -3082,11 +3112,7 @@ static noinline void __schedule_bug(struct task_struct *prev) print_modules(); if (irqs_disabled()) print_irqtrace_events(prev); - - if (regs) - show_regs(regs); - else - dump_stack(); + dump_stack(); } /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 94340c7544a..0d97ebdc58f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) #endif /* CONFIG_FAIR_GROUP_SCHED */ -static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec); +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec); /************************************************************** * Scheduling class tree data structure manipulation methods: @@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) __clear_buddies_skip(se); } -static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) @@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, resched_task(rq_of(cfs_rq)->curr); } -static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec) +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) { if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) return; @@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq) } #else /* CONFIG_CFS_BANDWIDTH */ -static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec) {} +static __always_inline +void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} -static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b60dad72017..44af55e6d5d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) next_idx: if (idx >= MAX_RT_PRIO) continue; - if (next && next->prio < idx) + if (next && next->prio <= idx) continue; list_for_each_entry(rt_se, array->queue + idx, run_list) { struct task_struct *p; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 42b1f304b04..fb3acba4d52 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -681,6 +681,9 @@ static inline int task_running(struct rq *rq, struct task_struct *p) #ifndef finish_arch_switch # define finish_arch_switch(prev) do { } while (0) #endif +#ifndef finish_arch_post_lock_switch +# define finish_arch_post_lock_switch() do { } while (0) +#endif #ifndef __ARCH_WANT_UNLOCKED_CTXSW static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) diff --git a/kernel/signal.c b/kernel/signal.c index d523da02dd1..17afcaf582d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -36,6 +36,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/siginfo.h> +#include <asm/cacheflush.h> #include "audit.h" /* audit_signal_info() */ /* diff --git a/kernel/smp.c b/kernel/smp.c index db197d60489..2f8b10ecf75 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -701,3 +701,93 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait) return ret; } EXPORT_SYMBOL(on_each_cpu); + +/** + * on_each_cpu_mask(): Run a function on processors specified by + * cpumask, which may include the local processor. + * @mask: The set of cpus to run on (only runs on online subset). + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed + * on other CPUs. + * + * If @wait is true, then returns once @func has returned. + * + * You must not call this function with disabled interrupts or + * from a hardware interrupt handler or from a bottom half handler. + */ +void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, + void *info, bool wait) +{ + int cpu = get_cpu(); + + smp_call_function_many(mask, func, info, wait); + if (cpumask_test_cpu(cpu, mask)) { + local_irq_disable(); + func(info); + local_irq_enable(); + } + put_cpu(); +} +EXPORT_SYMBOL(on_each_cpu_mask); + +/* + * on_each_cpu_cond(): Call a function on each processor for which + * the supplied function cond_func returns true, optionally waiting + * for all the required CPUs to finish. This may include the local + * processor. + * @cond_func: A callback function that is passed a cpu id and + * the the info parameter. The function is called + * with preemption disabled. The function should + * return a blooean value indicating whether to IPI + * the specified CPU. + * @func: The function to run on all applicable CPUs. + * This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to both functions. + * @wait: If true, wait (atomically) until function has + * completed on other CPUs. + * @gfp_flags: GFP flags to use when allocating the cpumask + * used internally by the function. + * + * The function might sleep if the GFP flags indicates a non + * atomic allocation is allowed. + * + * Preemption is disabled to protect against CPUs going offline but not online. + * CPUs going online during the call will not be seen or sent an IPI. + * + * You must not call this function with disabled interrupts or + * from a hardware interrupt handler or from a bottom half handler. + */ +void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), + smp_call_func_t func, void *info, bool wait, + gfp_t gfp_flags) +{ + cpumask_var_t cpus; + int cpu, ret; + + might_sleep_if(gfp_flags & __GFP_WAIT); + + if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) { + preempt_disable(); + for_each_online_cpu(cpu) + if (cond_func(cpu, info)) + cpumask_set_cpu(cpu, cpus); + on_each_cpu_mask(cpus, func, info, wait); + preempt_enable(); + free_cpumask_var(cpus); + } else { + /* + * No free cpumask, bother. No matter, we'll + * just have to IPI them one by one. + */ + preempt_disable(); + for_each_online_cpu(cpu) + if (cond_func(cpu, info)) { + ret = smp_call_function_single(cpu, func, + info, wait); + WARN_ON_ONCE(!ret); + } + preempt_enable(); + } +} +EXPORT_SYMBOL(on_each_cpu_cond); diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 84c7d96918b..5cdd8065a3c 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -163,7 +163,7 @@ void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock) EXPORT_SYMBOL(_raw_spin_lock_bh); #endif -#ifndef CONFIG_INLINE_SPIN_UNLOCK +#ifdef CONFIG_UNINLINE_SPIN_UNLOCK void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) { __raw_spin_unlock(lock); diff --git a/kernel/sys.c b/kernel/sys.c index 9eb7fcab8df..e7006eb6c1e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -444,6 +444,15 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, magic2 != LINUX_REBOOT_MAGIC2C)) return -EINVAL; + /* + * If pid namespaces are enabled and the current task is in a child + * pid_namespace, the command is handled by reboot_pid_ns() which will + * call do_exit(). + */ + ret = reboot_pid_ns(task_active_pid_ns(current), cmd); + if (ret) + return ret; + /* Instead of trying to make the power_off code look like * halt when pm_power_off is not set do it the easy way. */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d48ff4fd44c..4ab11879aeb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -23,6 +23,7 @@ #include <linux/swap.h> #include <linux/slab.h> #include <linux/sysctl.h> +#include <linux/bitmap.h> #include <linux/signal.h> #include <linux/printk.h> #include <linux/proc_fs.h> @@ -68,6 +69,9 @@ #include <asm/stacktrace.h> #include <asm/io.h> #endif +#ifdef CONFIG_SPARC +#include <asm/setup.h> +#endif #ifdef CONFIG_BSD_PROCESS_ACCT #include <linux/acct.h> #endif @@ -142,7 +146,6 @@ static const int cap_last_cap = CAP_LAST_CAP; #include <linux/inotify.h> #endif #ifdef CONFIG_SPARC -#include <asm/system.h> #endif #ifdef CONFIG_SPARC64 @@ -167,7 +170,7 @@ static int proc_taint(struct ctl_table *table, int write, #endif #ifdef CONFIG_PRINTK -static int proc_dmesg_restrict(struct ctl_table *table, int write, +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -700,7 +703,7 @@ static struct ctl_table kern_table[] = { .data = &dmesg_restrict, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_sysadmin, .extra1 = &zero, .extra2 = &one, }, @@ -709,7 +712,7 @@ static struct ctl_table kern_table[] = { .data = &kptr_restrict, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dmesg_restrict, + .proc_handler = proc_dointvec_minmax_sysadmin, .extra1 = &zero, .extra2 = &two, }, @@ -1940,7 +1943,7 @@ static int proc_taint(struct ctl_table *table, int write, } #ifdef CONFIG_PRINTK -static int proc_dmesg_restrict(struct ctl_table *table, int write, +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (write && !capable(CAP_SYS_ADMIN)) @@ -2393,9 +2396,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, } } - while (val_a <= val_b) - set_bit(val_a++, tmp_bitmap); - + bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1); first = 0; proc_skip_char(&kbuf, &left, '\n'); } @@ -2438,8 +2439,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, if (*ppos) bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); else - memcpy(bitmap, tmp_bitmap, - BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long)); + bitmap_copy(bitmap, tmp_bitmap, bitmap_len); } kfree(tmp_bitmap); *lenp -= left; diff --git a/kernel/time.c b/kernel/time.c index 73e416db0a1..ba744cf8069 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -163,7 +163,6 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) return error; if (tz) { - /* SMP safe, global irq locking makes it work. */ sys_tz = *tz; update_vsyscall_tz(); if (firsttime) { @@ -173,12 +172,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) } } if (tv) - { - /* SMP safe, again the code in arch/foo/time.c should - * globally block out interrupts when it runs. - */ return do_settimeofday(tv); - } return 0; } diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 8a46f5d6450..8a538c55fc7 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -96,6 +96,11 @@ static int alarmtimer_rtc_add_device(struct device *dev, return 0; } +static inline void alarmtimer_rtc_timer_init(void) +{ + rtc_timer_init(&rtctimer, NULL, NULL); +} + static struct class_interface alarmtimer_rtc_interface = { .add_dev = &alarmtimer_rtc_add_device, }; @@ -117,6 +122,7 @@ static inline struct rtc_device *alarmtimer_get_rtcdev(void) #define rtcdev (NULL) static inline int alarmtimer_rtc_interface_setup(void) { return 0; } static inline void alarmtimer_rtc_interface_remove(void) { } +static inline void alarmtimer_rtc_timer_init(void) { } #endif /** @@ -783,6 +789,8 @@ static int __init alarmtimer_init(void) .nsleep = alarm_timer_nsleep, }; + alarmtimer_rtc_timer_init(); + posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index a45ca167ab2..c9583382141 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -500,7 +500,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) { u64 ret; /* - * We won't try to correct for more then 11% adjustments (110,000 ppm), + * We won't try to correct for more than 11% adjustments (110,000 ppm), */ ret = (u64)cs->mult * 11; do_div(ret,100); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 6e039b144da..f03fd83b170 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -34,8 +34,6 @@ unsigned long tick_nsec; static u64 tick_length; static u64 tick_length_base; -static struct hrtimer leap_timer; - #define MAX_TICKADJ 500LL /* usecs */ #define MAX_TICKADJ_SCALED \ (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) @@ -381,70 +379,63 @@ u64 ntp_tick_length(void) /* - * Leap second processing. If in leap-insert state at the end of the - * day, the system clock is set back one second; if in leap-delete - * state, the system clock is set ahead one second. + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + * Also handles leap second processing, and returns leap offset */ -static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) +int second_overflow(unsigned long secs) { - enum hrtimer_restart res = HRTIMER_NORESTART; - unsigned long flags; + s64 delta; int leap = 0; + unsigned long flags; spin_lock_irqsave(&ntp_lock, flags); + + /* + * Leap second processing. If in leap-insert state at the end of the + * day, the system clock is set back one second; if in leap-delete + * state, the system clock is set ahead one second. + */ switch (time_state) { case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; break; case TIME_INS: - leap = -1; - time_state = TIME_OOP; - printk(KERN_NOTICE - "Clock: inserting leap second 23:59:60 UTC\n"); - hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); - res = HRTIMER_RESTART; + if (secs % 86400 == 0) { + leap = -1; + time_state = TIME_OOP; + printk(KERN_NOTICE + "Clock: inserting leap second 23:59:60 UTC\n"); + } break; case TIME_DEL: - leap = 1; - time_tai--; - time_state = TIME_WAIT; - printk(KERN_NOTICE - "Clock: deleting leap second 23:59:59 UTC\n"); + if ((secs + 1) % 86400 == 0) { + leap = 1; + time_tai--; + time_state = TIME_WAIT; + printk(KERN_NOTICE + "Clock: deleting leap second 23:59:59 UTC\n"); + } break; case TIME_OOP: time_tai++; time_state = TIME_WAIT; - /* fall through */ + break; + case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; break; } - spin_unlock_irqrestore(&ntp_lock, flags); - /* - * We have to call this outside of the ntp_lock to keep - * the proper locking hierarchy - */ - if (leap) - timekeeping_leap_insert(leap); - - return res; -} - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - */ -void second_overflow(void) -{ - s64 delta; - unsigned long flags; - - spin_lock_irqsave(&ntp_lock, flags); /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; @@ -481,15 +472,17 @@ void second_overflow(void) tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; time_adjust = 0; + + + out: spin_unlock_irqrestore(&ntp_lock, flags); + + return leap; } #ifdef CONFIG_GENERIC_CMOS_UPDATE -/* Disable the cmos update - used by virtualization and embedded */ -int no_sync_cmos_clock __read_mostly; - static void sync_cmos_clock(struct work_struct *work); static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); @@ -536,35 +529,13 @@ static void sync_cmos_clock(struct work_struct *work) static void notify_cmos_timer(void) { - if (!no_sync_cmos_clock) - schedule_delayed_work(&sync_cmos_work, 0); + schedule_delayed_work(&sync_cmos_work, 0); } #else static inline void notify_cmos_timer(void) { } #endif -/* - * Start the leap seconds timer: - */ -static inline void ntp_start_leap_timer(struct timespec *ts) -{ - long now = ts->tv_sec; - - if (time_status & STA_INS) { - time_state = TIME_INS; - now += 86400 - now % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - - return; - } - - if (time_status & STA_DEL) { - time_state = TIME_DEL; - now += 86400 - (now + 1) % 86400; - hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); - } -} /* * Propagate a new txc->status value into the NTP state: @@ -589,22 +560,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) time_status &= STA_RONLY; time_status |= txc->status & ~STA_RONLY; - switch (time_state) { - case TIME_OK: - ntp_start_leap_timer(ts); - break; - case TIME_INS: - case TIME_DEL: - time_state = TIME_OK; - ntp_start_leap_timer(ts); - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - break; - case TIME_OOP: - hrtimer_restart(&leap_timer); - break; - } } /* * Called with the xtime lock held, so we can access and modify @@ -686,9 +641,6 @@ int do_adjtimex(struct timex *txc) (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) return -EINVAL; - - if (txc->modes & ADJ_STATUS && time_state != TIME_OK) - hrtimer_cancel(&leap_timer); } if (txc->modes & ADJ_SETOFFSET) { @@ -1010,6 +962,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { ntp_clear(); - hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - leap_timer.function = ntp_leap_second; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 15be32e19c6..d66b21308f7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp) } -void timekeeping_leap_insert(int leapsecond) -{ - unsigned long flags; - - write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeper.xtime.tv_sec += leapsecond; - timekeeper.wall_to_monotonic.tv_sec -= leapsecond; - timekeeping_update(false); - write_sequnlock_irqrestore(&timekeeper.lock, flags); - -} - /** * timekeeping_forward_now - update clock to the current time * @@ -448,9 +436,12 @@ EXPORT_SYMBOL(timekeeping_inject_offset); static int change_clocksource(void *data) { struct clocksource *new, *old; + unsigned long flags; new = (struct clocksource *) data; + write_seqlock_irqsave(&timekeeper.lock, flags); + timekeeping_forward_now(); if (!new->enable || new->enable(new) == 0) { old = timekeeper.clock; @@ -458,6 +449,10 @@ static int change_clocksource(void *data) if (old->disable) old->disable(old); } + timekeeping_update(true); + + write_sequnlock_irqrestore(&timekeeper.lock, flags); + return 0; } @@ -827,7 +822,7 @@ static void timekeeping_adjust(s64 offset) int adj; /* - * The point of this is to check if the error is greater then half + * The point of this is to check if the error is greater than half * an interval. * * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. @@ -835,7 +830,7 @@ static void timekeeping_adjust(s64 offset) * Note we subtract one in the shift, so that error is really error*2. * This "saves" dividing(shifting) interval twice, but keeps the * (error > interval) comparison as still measuring if error is - * larger then half an interval. + * larger than half an interval. * * Note: It does not "save" on aggravation when reading the code. */ @@ -843,7 +838,7 @@ static void timekeeping_adjust(s64 offset) if (error > interval) { /* * We now divide error by 4(via shift), which checks if - * the error is greater then twice the interval. + * the error is greater than twice the interval. * If it is greater, we need a bigadjust, if its smaller, * we can adjust by 1. */ @@ -874,13 +869,15 @@ static void timekeeping_adjust(s64 offset) } else /* No adjustment needed */ return; - WARN_ONCE(timekeeper.clock->maxadj && - (timekeeper.mult + adj > timekeeper.clock->mult + - timekeeper.clock->maxadj), - "Adjusting %s more then 11%% (%ld vs %ld)\n", + if (unlikely(timekeeper.clock->maxadj && + (timekeeper.mult + adj > + timekeeper.clock->mult + timekeeper.clock->maxadj))) { + printk_once(KERN_WARNING + "Adjusting %s more than 11%% (%ld vs %ld)\n", timekeeper.clock->name, (long)timekeeper.mult + adj, (long)timekeeper.clock->mult + timekeeper.clock->maxadj); + } /* * So the following can be confusing. * @@ -952,7 +949,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; u64 raw_nsecs; - /* If the offset is smaller then a shifted interval, do nothing */ + /* If the offset is smaller than a shifted interval, do nothing */ if (offset < timekeeper.cycle_interval<<shift) return offset; @@ -962,9 +959,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; while (timekeeper.xtime_nsec >= nsecps) { + int leap; timekeeper.xtime_nsec -= nsecps; timekeeper.xtime.tv_sec++; - second_overflow(); + leap = second_overflow(timekeeper.xtime.tv_sec); + timekeeper.xtime.tv_sec += leap; } /* Accumulate raw time */ @@ -1018,13 +1017,13 @@ static void update_wall_time(void) * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, * we calculate the largest doubling multiple of cycle_intervals - * that is smaller then the offset. We then accumulate that + * that is smaller than the offset. We then accumulate that * chunk in one go, and then try to consume the next smaller * doubled multiple. */ shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); shift = max(0, shift); - /* Bound shift to one less then what overflows tick_length */ + /* Bound shift to one less than what overflows tick_length */ maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { @@ -1072,12 +1071,14 @@ static void update_wall_time(void) /* * Finally, make sure that after the rounding - * xtime.tv_nsec isn't larger then NSEC_PER_SEC + * xtime.tv_nsec isn't larger than NSEC_PER_SEC */ if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { + int leap; timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; timekeeper.xtime.tv_sec++; - second_overflow(); + leap = second_overflow(timekeeper.xtime.tv_sec); + timekeeper.xtime.tv_sec += leap; } timekeeping_update(false); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cd3134510f3..a1d2849f247 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -141,7 +141,7 @@ if FTRACE config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER - select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE + select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index cdea7b56b0c..c0bd0308741 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -311,13 +311,6 @@ int blk_trace_remove(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_trace_remove); -static int blk_dropped_open(struct inode *inode, struct file *filp) -{ - filp->private_data = inode->i_private; - - return 0; -} - static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { @@ -331,18 +324,11 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, static const struct file_operations blk_dropped_fops = { .owner = THIS_MODULE, - .open = blk_dropped_open, + .open = simple_open, .read = blk_dropped_read, .llseek = default_llseek, }; -static int blk_msg_open(struct inode *inode, struct file *filp) -{ - filp->private_data = inode->i_private; - - return 0; -} - static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, size_t count, loff_t *ppos) { @@ -371,7 +357,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, static const struct file_operations blk_msg_fops = { .owner = THIS_MODULE, - .open = blk_msg_open, + .open = simple_open, .write = blk_msg_write, .llseek = noop_llseek, }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 867bd1dd2dd..0fa92f677c9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -249,7 +249,8 @@ static void update_ftrace_function(void) #else __ftrace_trace_function = func; #endif - ftrace_trace_function = ftrace_test_stop_func; + ftrace_trace_function = + (func == ftrace_stub) ? func : ftrace_test_stop_func; #endif } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f5b7b5c1195..cf8d11e91ef 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -154,33 +154,10 @@ enum { static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; -#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) - -/** - * tracing_on - enable all tracing buffers - * - * This function enables all tracing buffers that may have been - * disabled with tracing_off. - */ -void tracing_on(void) -{ - set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); -} -EXPORT_SYMBOL_GPL(tracing_on); +/* Used for individual buffers (after the counter) */ +#define RB_BUFFER_OFF (1 << 20) -/** - * tracing_off - turn off all tracing buffers - * - * This function stops all tracing buffers from recording data. - * It does not disable any overhead the tracers themselves may - * be causing. This function simply causes all recording to - * the ring buffers to fail. - */ -void tracing_off(void) -{ - clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); -} -EXPORT_SYMBOL_GPL(tracing_off); +#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) /** * tracing_off_permanent - permanently disable ring buffers @@ -193,15 +170,6 @@ void tracing_off_permanent(void) set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); } -/** - * tracing_is_on - show state of ring buffers enabled - */ -int tracing_is_on(void) -{ - return ring_buffer_flags == RB_BUFFERS_ON; -} -EXPORT_SYMBOL_GPL(tracing_is_on); - #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) @@ -2619,6 +2587,63 @@ void ring_buffer_record_enable(struct ring_buffer *buffer) EXPORT_SYMBOL_GPL(ring_buffer_record_enable); /** + * ring_buffer_record_off - stop all writes into the buffer + * @buffer: The ring buffer to stop writes to. + * + * This prevents all writes to the buffer. Any attempt to write + * to the buffer after this will fail and return NULL. + * + * This is different than ring_buffer_record_disable() as + * it works like an on/off switch, where as the disable() verison + * must be paired with a enable(). + */ +void ring_buffer_record_off(struct ring_buffer *buffer) +{ + unsigned int rd; + unsigned int new_rd; + + do { + rd = atomic_read(&buffer->record_disabled); + new_rd = rd | RB_BUFFER_OFF; + } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); +} +EXPORT_SYMBOL_GPL(ring_buffer_record_off); + +/** + * ring_buffer_record_on - restart writes into the buffer + * @buffer: The ring buffer to start writes to. + * + * This enables all writes to the buffer that was disabled by + * ring_buffer_record_off(). + * + * This is different than ring_buffer_record_enable() as + * it works like an on/off switch, where as the enable() verison + * must be paired with a disable(). + */ +void ring_buffer_record_on(struct ring_buffer *buffer) +{ + unsigned int rd; + unsigned int new_rd; + + do { + rd = atomic_read(&buffer->record_disabled); + new_rd = rd & ~RB_BUFFER_OFF; + } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); +} +EXPORT_SYMBOL_GPL(ring_buffer_record_on); + +/** + * ring_buffer_record_is_on - return true if the ring buffer can write + * @buffer: The ring buffer to see if write is enabled + * + * Returns true if the ring buffer is in a state that it accepts writes. + */ +int ring_buffer_record_is_on(struct ring_buffer *buffer) +{ + return !atomic_read(&buffer->record_disabled); +} + +/** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. * @cpu: The CPU buffer to stop @@ -4039,68 +4064,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_read_page); -#ifdef CONFIG_TRACING -static ssize_t -rb_simple_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - unsigned long *p = filp->private_data; - char buf[64]; - int r; - - if (test_bit(RB_BUFFERS_DISABLED_BIT, p)) - r = sprintf(buf, "permanently disabled\n"); - else - r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p)); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -rb_simple_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - unsigned long *p = filp->private_data; - unsigned long val; - int ret; - - ret = kstrtoul_from_user(ubuf, cnt, 10, &val); - if (ret) - return ret; - - if (val) - set_bit(RB_BUFFERS_ON_BIT, p); - else - clear_bit(RB_BUFFERS_ON_BIT, p); - - (*ppos)++; - - return cnt; -} - -static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, - .read = rb_simple_read, - .write = rb_simple_write, - .llseek = default_llseek, -}; - - -static __init int rb_init_debugfs(void) -{ - struct dentry *d_tracer; - - d_tracer = tracing_init_dentry(); - - trace_create_file("tracing_on", 0644, d_tracer, - &ring_buffer_flags, &rb_simple_fops); - - return 0; -} - -fs_initcall(rb_init_debugfs); -#endif - #ifdef CONFIG_HOTPLUG_CPU static int rb_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10d5503f0d0..ed7b5d1e12f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -36,6 +36,7 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/poll.h> +#include <linux/nmi.h> #include <linux/fs.h> #include "trace.h" @@ -352,6 +353,59 @@ static void wakeup_work_handler(struct work_struct *work) static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); /** + * tracing_on - enable tracing buffers + * + * This function enables tracing buffers that may have been + * disabled with tracing_off. + */ +void tracing_on(void) +{ + if (global_trace.buffer) + ring_buffer_record_on(global_trace.buffer); + /* + * This flag is only looked at when buffers haven't been + * allocated yet. We don't really care about the race + * between setting this flag and actually turning + * on the buffer. + */ + global_trace.buffer_disabled = 0; +} +EXPORT_SYMBOL_GPL(tracing_on); + +/** + * tracing_off - turn off tracing buffers + * + * This function stops the tracing buffers from recording data. + * It does not disable any overhead the tracers themselves may + * be causing. This function simply causes all recording to + * the ring buffers to fail. + */ +void tracing_off(void) +{ + if (global_trace.buffer) + ring_buffer_record_on(global_trace.buffer); + /* + * This flag is only looked at when buffers haven't been + * allocated yet. We don't really care about the race + * between setting this flag and actually turning + * on the buffer. + */ + global_trace.buffer_disabled = 1; +} +EXPORT_SYMBOL_GPL(tracing_off); + +/** + * tracing_is_on - show state of ring buffers enabled + */ +int tracing_is_on(void) +{ + if (global_trace.buffer) + return ring_buffer_record_is_on(global_trace.buffer); + return !global_trace.buffer_disabled; +} +EXPORT_SYMBOL_GPL(tracing_is_on); + +/** * trace_wake_up - wake up tasks waiting for trace input * * Schedules a delayed work to wake up any task that is blocked on the @@ -1644,6 +1698,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; + int next_size = 0; int cpu; /* @@ -1675,9 +1730,12 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, next_cpu = cpu; next_ts = ts; next_lost = lost_events; + next_size = iter->ent_size; } } + iter->ent_size = next_size; + if (ent_cpu) *ent_cpu = next_cpu; @@ -4567,6 +4625,55 @@ static __init void create_trace_options_dir(void) create_trace_option_core_file(trace_options[i], i); } +static ssize_t +rb_simple_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ring_buffer *buffer = filp->private_data; + char buf[64]; + int r; + + if (buffer) + r = ring_buffer_record_is_on(buffer); + else + r = 0; + + r = sprintf(buf, "%d\n", r); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +rb_simple_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ring_buffer *buffer = filp->private_data; + unsigned long val; + int ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + if (buffer) { + if (val) + ring_buffer_record_on(buffer); + else + ring_buffer_record_off(buffer); + } + + (*ppos)++; + + return cnt; +} + +static const struct file_operations rb_simple_fops = { + .open = tracing_open_generic, + .read = rb_simple_read, + .write = rb_simple_write, + .llseek = default_llseek, +}; + static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; @@ -4626,6 +4733,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("trace_clock", 0644, d_tracer, NULL, &trace_clock_fops); + trace_create_file("tracing_on", 0644, d_tracer, + global_trace.buffer, &rb_simple_fops); + #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); @@ -4798,6 +4908,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(&iter); } + touch_nmi_watchdog(); trace_printk_seq(&iter.seq); } @@ -4863,6 +4974,8 @@ __init static int tracer_alloc_buffers(void) goto out_free_cpumask; } global_trace.entries = ring_buffer_size(global_trace.buffer); + if (global_trace.buffer_disabled) + tracing_off(); #ifdef CONFIG_TRACER_MAX_TRACE diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54faec790bc..95059f091a2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -154,6 +154,7 @@ struct trace_array { struct ring_buffer *buffer; unsigned long entries; int cpu; + int buffer_disabled; cycle_t time_start; struct task_struct *waiter; struct trace_array_cpu *data[NR_CPUS]; @@ -835,13 +836,11 @@ extern const char *__stop___trace_bprintk_fmt[]; filter) #include "trace_entries.h" -#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_FUNCTION_TRACER int perf_ftrace_event_register(struct ftrace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL #endif /* CONFIG_FUNCTION_TRACER */ -#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index d91eb0541b3..4108e1250ca 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -166,6 +166,12 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, #define FTRACE_STACK_ENTRIES 8 +#ifndef CONFIG_64BIT +# define IP_FMT "%08lx" +#else +# define IP_FMT "%016lx" +#endif + FTRACE_ENTRY(kernel_stack, stack_entry, TRACE_STACK, @@ -175,8 +181,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry, __dynamic_array(unsigned long, caller ) ), - F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n", __entry->caller[0], __entry->caller[1], __entry->caller[2], __entry->caller[3], __entry->caller[4], __entry->caller[5], __entry->caller[6], __entry->caller[7]), @@ -193,8 +200,9 @@ FTRACE_ENTRY(user_stack, userstack_entry, __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) ), - F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n", __entry->caller[0], __entry->caller[1], __entry->caller[2], __entry->caller[3], __entry->caller[4], __entry->caller[5], __entry->caller[6], __entry->caller[7]), diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7b46c9bd22a..3dd15e8bc85 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -162,7 +162,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #define __dynamic_array(type, item) #undef F_printk -#define F_printk(fmt, args...) #fmt ", " __stringify(args) +#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args) #undef FTRACE_ENTRY_REG #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ |