diff options
Diffstat (limited to 'kernel')
46 files changed, 2382 insertions, 830 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 85cbfb31e73..e9cf19155b4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,7 +21,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg -CFLAGS_REMOVE_perf_event.o = -pg CFLAGS_REMOVE_irq_work.o = -pg endif @@ -103,8 +102,9 @@ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_IRQ_WORK) += irq_work.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o -obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o + +obj-$(CONFIG_PERF_EVENTS) += events/ + obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/kernel/capability.c b/kernel/capability.c index bf0c734d0c1..32a80e08ff4 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap) return ns_capable(task_cred_xxx(t, user)->user_ns, cap); } EXPORT_SYMBOL(task_ns_capable); + +/** + * nsown_capable - Check superior capability to one's own user_ns + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at its own user namespace. + */ +bool nsown_capable(int cap) +{ + return ns_capable(current_user_ns(), cap); +} diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55048d..8093c16b84b 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -54,6 +54,7 @@ struct cred init_cred = { .cap_effective = CAP_INIT_EFF_SET, .cap_bset = CAP_INIT_BSET, .user = INIT_USER, + .user_ns = &init_user_ns, .group_info = &init_groups, #ifdef CONFIG_KEYS .tgcred = &init_tgcred, @@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } + /* cache user_ns in cred. Doesn't need a refcount because it will + * stay pinned by cred->user + */ + new->user_ns = new->user->user_ns; + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode) } EXPORT_SYMBOL(set_create_files_as); -struct user_namespace *current_user_ns(void) -{ - return _current_user_ns(); -} -EXPORT_SYMBOL(current_user_ns); - #ifdef CONFIG_DEBUG_CREDENTIALS bool creds_are_invalid(const struct cred *cred) diff --git a/kernel/events/Makefile b/kernel/events/Makefile new file mode 100644 index 00000000000..1ce23d3d839 --- /dev/null +++ b/kernel/events/Makefile @@ -0,0 +1,6 @@ +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_core.o = -pg +endif + +obj-y := core.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/kernel/perf_event.c b/kernel/events/core.c index 8e81a9860a0..0fc34a370ba 100644 --- a/kernel/perf_event.c +++ b/kernel/events/core.c @@ -2,8 +2,8 @@ * Performance events core code: * * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * * For licensing details see kernel-base/COPYING @@ -39,10 +39,10 @@ #include <asm/irq_regs.h> struct remote_function_call { - struct task_struct *p; - int (*func)(void *info); - void *info; - int ret; + struct task_struct *p; + int (*func)(void *info); + void *info; + int ret; }; static void remote_function(void *data) @@ -76,10 +76,10 @@ static int task_function_call(struct task_struct *p, int (*func) (void *info), void *info) { struct remote_function_call data = { - .p = p, - .func = func, - .info = info, - .ret = -ESRCH, /* No such (running) process */ + .p = p, + .func = func, + .info = info, + .ret = -ESRCH, /* No such (running) process */ }; if (task_curr(p)) @@ -100,10 +100,10 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info) static int cpu_function_call(int cpu, int (*func) (void *info), void *info) { struct remote_function_call data = { - .p = NULL, - .func = func, - .info = info, - .ret = -ENXIO, /* No such CPU */ + .p = NULL, + .func = func, + .info = info, + .ret = -ENXIO, /* No such CPU */ }; smp_call_function_single(cpu, remote_function, &data, 1); @@ -125,7 +125,7 @@ enum event_type_t { * perf_sched_events : >0 events exist * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu */ -atomic_t perf_sched_events __read_mostly; +struct jump_label_key perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static atomic_t nr_mmap_events __read_mostly; @@ -5429,7 +5429,7 @@ fail: return err; } -atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; +struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) { @@ -7445,11 +7445,11 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, } struct cgroup_subsys perf_subsys = { - .name = "perf_event", - .subsys_id = perf_subsys_id, - .create = perf_cgroup_create, - .destroy = perf_cgroup_destroy, - .exit = perf_cgroup_exit, - .attach = perf_cgroup_attach, + .name = "perf_event", + .subsys_id = perf_subsys_id, + .create = perf_cgroup_create, + .destroy = perf_cgroup_destroy, + .exit = perf_cgroup_exit, + .attach = perf_cgroup_attach, }; #endif /* CONFIG_CGROUP_PERF */ diff --git a/kernel/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 086adf25a55..086adf25a55 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c diff --git a/kernel/extable.c b/kernel/extable.c index 7f8f263f852..c2d625fcda7 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -72,6 +72,14 @@ int core_kernel_text(unsigned long addr) return 0; } +int core_kernel_data(unsigned long addr) +{ + if (addr >= (unsigned long)_sdata && + addr < (unsigned long)_edata) + return 1; + return 0; +} + int __kernel_text_address(unsigned long addr) { if (core_kernel_text(addr)) diff --git a/kernel/freezer.c b/kernel/freezer.c index 66ecd2ead21..7b01de98bb6 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -17,7 +17,7 @@ static inline void frozen_process(void) { if (!unlikely(current->flags & PF_NOFREEZE)) { current->flags |= PF_FROZEN; - wmb(); + smp_wmb(); } clear_freeze_flag(current); } @@ -93,7 +93,7 @@ bool freeze_task(struct task_struct *p, bool sig_only) * the task as frozen and next clears its TIF_FREEZE. */ if (!freezing(p)) { - rmb(); + smp_rmb(); if (frozen(p)) return false; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 53ead174da2..ea640120ab8 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -33,7 +33,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Zero means infinite timeout - no checking done: */ -unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; +unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; unsigned long __read_mostly sysctl_hung_task_warnings = 10; diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index c574f9a12c4..d1d051b38e0 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -48,6 +48,10 @@ config IRQ_PREFLOW_FASTEOI config IRQ_EDGE_EOI_HANDLER bool +# Generic configurable interrupt chip implementation +config GENERIC_IRQ_CHIP + bool + # Support forced irq threading config IRQ_FORCED_THREADING bool diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 54329cd7b3e..73290056cfb 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,5 +1,6 @@ obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o +obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 4af1e2b244c..d5a3009da71 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -310,6 +310,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) out_unlock: raw_spin_unlock(&desc->lock); } +EXPORT_SYMBOL_GPL(handle_simple_irq); /** * handle_level_irq - Level type irq handler @@ -573,6 +574,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, if (handle != handle_bad_irq && is_chained) { irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); + irq_settings_set_nothread(desc); irq_startup(desc); } out: @@ -612,6 +614,7 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) irq_put_desc_unlock(desc, flags); } +EXPORT_SYMBOL_GPL(irq_modify_status); /** * irq_cpu_online - Invoke all irq_cpu_online functions. diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h index 306cba37e9a..97a8bfadc88 100644 --- a/kernel/irq/debug.h +++ b/kernel/irq/debug.h @@ -27,6 +27,7 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) P(IRQ_PER_CPU); P(IRQ_NOPROBE); P(IRQ_NOREQUEST); + P(IRQ_NOTHREAD); P(IRQ_NOAUTOEN); PS(IRQS_AUTODETECT); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c new file mode 100644 index 00000000000..31a9db71190 --- /dev/null +++ b/kernel/irq/generic-chip.c @@ -0,0 +1,354 @@ +/* + * Library implementing the most common irq chip callback functions + * + * Copyright (C) 2011, Thomas Gleixner + */ +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/syscore_ops.h> + +#include "internals.h" + +static LIST_HEAD(gc_list); +static DEFINE_RAW_SPINLOCK(gc_lock); + +static inline struct irq_chip_regs *cur_regs(struct irq_data *d) +{ + return &container_of(d->chip, struct irq_chip_type, chip)->regs; +} + +/** + * irq_gc_noop - NOOP function + * @d: irq_data + */ +void irq_gc_noop(struct irq_data *d) +{ +} + +/** + * irq_gc_mask_disable_reg - Mask chip via disable register + * @d: irq_data + * + * Chip has separate enable/disable registers instead of a single mask + * register. + */ +void irq_gc_mask_disable_reg(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->disable); + gc->mask_cache &= ~mask; + irq_gc_unlock(gc); +} + +/** + * irq_gc_mask_set_mask_bit - Mask chip via setting bit in mask register + * @d: irq_data + * + * Chip has a single mask register. Values of this register are cached + * and protected by gc->lock + */ +void irq_gc_mask_set_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + gc->mask_cache |= mask; + irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask); + irq_gc_unlock(gc); +} + +/** + * irq_gc_mask_set_mask_bit - Mask chip via clearing bit in mask register + * @d: irq_data + * + * Chip has a single mask register. Values of this register are cached + * and protected by gc->lock + */ +void irq_gc_mask_clr_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + gc->mask_cache &= ~mask; + irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask); + irq_gc_unlock(gc); +} + +/** + * irq_gc_unmask_enable_reg - Unmask chip via enable register + * @d: irq_data + * + * Chip has separate enable/disable registers instead of a single mask + * register. + */ +void irq_gc_unmask_enable_reg(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->enable); + gc->mask_cache |= mask; + irq_gc_unlock(gc); +} + +/** + * irq_gc_ack - Ack pending interrupt + * @d: irq_data + */ +void irq_gc_ack(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack); + irq_gc_unlock(gc); +} + +/** + * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt + * @d: irq_data + */ +void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->mask); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack); + irq_gc_unlock(gc); +} + +/** + * irq_gc_eoi - EOI interrupt + * @d: irq_data + */ +void irq_gc_eoi(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->eoi); + irq_gc_unlock(gc); +} + +/** + * irq_gc_set_wake - Set/clr wake bit for an interrupt + * @d: irq_data + * + * For chips where the wake from suspend functionality is not + * configured in a separate register and the wakeup active state is + * just stored in a bitmask. + */ +int irq_gc_set_wake(struct irq_data *d, unsigned int on) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + if (!(mask & gc->wake_enabled)) + return -EINVAL; + + irq_gc_lock(gc); + if (on) + gc->wake_active |= mask; + else + gc->wake_active &= ~mask; + irq_gc_unlock(gc); + return 0; +} + +/** + * irq_alloc_generic_chip - Allocate a generic chip and initialize it + * @name: Name of the irq chip + * @num_ct: Number of irq_chip_type instances associated with this + * @irq_base: Interrupt base nr for this chip + * @reg_base: Register base address (virtual) + * @handler: Default flow handler associated with this chip + * + * Returns an initialized irq_chip_generic structure. The chip defaults + * to the primary (index 0) irq_chip_type and @handler + */ +struct irq_chip_generic * +irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler) +{ + struct irq_chip_generic *gc; + unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); + + gc = kzalloc(sz, GFP_KERNEL); + if (gc) { + raw_spin_lock_init(&gc->lock); + gc->num_ct = num_ct; + gc->irq_base = irq_base; + gc->reg_base = reg_base; + gc->chip_types->chip.name = name; + gc->chip_types->handler = handler; + } + return gc; +} + +/* + * Separate lockdep class for interrupt chip which can nest irq_desc + * lock. + */ +static struct lock_class_key irq_nested_lock_class; + +/** + * irq_setup_generic_chip - Setup a range of interrupts with a generic chip + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @flags: Flags for initialization + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Set up max. 32 interrupts starting from gc->irq_base. Note, this + * initializes all interrupts to the primary irq_chip_type and its + * associated handler. + */ +void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, + enum irq_gc_flags flags, unsigned int clr, + unsigned int set) +{ + struct irq_chip_type *ct = gc->chip_types; + unsigned int i; + + raw_spin_lock(&gc_lock); + list_add_tail(&gc->list, &gc_list); + raw_spin_unlock(&gc_lock); + + /* Init mask cache ? */ + if (flags & IRQ_GC_INIT_MASK_CACHE) + gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); + + for (i = gc->irq_base; msk; msk >>= 1, i++) { + if (!msk & 0x01) + continue; + + if (flags & IRQ_GC_INIT_NESTED_LOCK) + irq_set_lockdep_class(i, &irq_nested_lock_class); + + irq_set_chip_and_handler(i, &ct->chip, ct->handler); + irq_set_chip_data(i, gc); + irq_modify_status(i, clr, set); + } + gc->irq_cnt = i - gc->irq_base; +} + +/** + * irq_setup_alt_chip - Switch to alternative chip + * @d: irq_data for this interrupt + * @type Flow type to be initialized + * + * Only to be called from chip->irq_set_type() callbacks. + */ +int irq_setup_alt_chip(struct irq_data *d, unsigned int type) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = gc->chip_types; + unsigned int i; + + for (i = 0; i < gc->num_ct; i++, ct++) { + if (ct->type & type) { + d->chip = &ct->chip; + irq_data_to_desc(d)->handle_irq = ct->handler; + return 0; + } + } + return -EINVAL; +} + +/** + * irq_remove_generic_chip - Remove a chip + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Remove up to 32 interrupts starting from gc->irq_base. + */ +void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, + unsigned int clr, unsigned int set) +{ + unsigned int i = gc->irq_base; + + raw_spin_lock(&gc_lock); + list_del(&gc->list); + raw_spin_unlock(&gc_lock); + + for (; msk; msk >>= 1, i++) { + if (!msk & 0x01) + continue; + + /* Remove handler first. That will mask the irq line */ + irq_set_handler(i, NULL); + irq_set_chip(i, &no_irq_chip); + irq_set_chip_data(i, NULL); + irq_modify_status(i, clr, set); + } +} + +#ifdef CONFIG_PM +static int irq_gc_suspend(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_suspend) + ct->chip.irq_suspend(irq_get_irq_data(gc->irq_base)); + } + return 0; +} + +static void irq_gc_resume(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_resume) + ct->chip.irq_resume(irq_get_irq_data(gc->irq_base)); + } +} +#else +#define irq_gc_suspend NULL +#define irq_gc_resume NULL +#endif + +static void irq_gc_shutdown(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_pm_shutdown) + ct->chip.irq_pm_shutdown(irq_get_irq_data(gc->irq_base)); + } +} + +static struct syscore_ops irq_gc_syscore_ops = { + .suspend = irq_gc_suspend, + .resume = irq_gc_resume, + .shutdown = irq_gc_shutdown, +}; + +static int __init irq_gc_init_ops(void) +{ + register_syscore_ops(&irq_gc_syscore_ops); + return 0; +} +device_initcall(irq_gc_init_ops); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2c039c9b938..886e80347b3 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -22,7 +22,7 @@ */ static struct lock_class_key irq_desc_lock_class; -#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) +#if defined(CONFIG_SMP) static void __init init_irq_default_affinity(void) { alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); @@ -290,6 +290,22 @@ static int irq_expand_nr_irqs(unsigned int nr) #endif /* !CONFIG_SPARSE_IRQ */ +/** + * generic_handle_irq - Invoke the handler for a particular irq + * @irq: The irq number to handle + * + */ +int generic_handle_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc) + return -EINVAL; + generic_handle_irq_desc(irq, desc); + return 0; +} +EXPORT_SYMBOL_GPL(generic_handle_irq); + /* Dynamic interrupt handling */ /** @@ -311,6 +327,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt) bitmap_clear(allocated_irqs, from, cnt); mutex_unlock(&sparse_irq_lock); } +EXPORT_SYMBOL_GPL(irq_free_descs); /** * irq_alloc_descs - allocate and initialize a range of irq descriptors @@ -351,6 +368,7 @@ err: mutex_unlock(&sparse_irq_lock); return ret; } +EXPORT_SYMBOL_GPL(irq_alloc_descs); /** * irq_reserve_irqs - mark irqs allocated @@ -430,7 +448,6 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } -#ifdef CONFIG_GENERIC_HARDIRQS unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -443,4 +460,3 @@ unsigned int kstat_irqs(unsigned int irq) sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; } -#endif /* CONFIG_GENERIC_HARDIRQS */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 07c1611f389..f7ce0021e1c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -900,7 +900,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ new->handler = irq_nested_primary_handler; } else { - irq_setup_forced_threading(new); + if (irq_settings_can_thread(desc)) + irq_setup_forced_threading(new); } /* diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index 0d91730b633..f1667833d44 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h @@ -8,6 +8,7 @@ enum { _IRQ_LEVEL = IRQ_LEVEL, _IRQ_NOPROBE = IRQ_NOPROBE, _IRQ_NOREQUEST = IRQ_NOREQUEST, + _IRQ_NOTHREAD = IRQ_NOTHREAD, _IRQ_NOAUTOEN = IRQ_NOAUTOEN, _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, _IRQ_NO_BALANCING = IRQ_NO_BALANCING, @@ -20,6 +21,7 @@ enum { #define IRQ_LEVEL GOT_YOU_MORON #define IRQ_NOPROBE GOT_YOU_MORON #define IRQ_NOREQUEST GOT_YOU_MORON +#define IRQ_NOTHREAD GOT_YOU_MORON #define IRQ_NOAUTOEN GOT_YOU_MORON #define IRQ_NESTED_THREAD GOT_YOU_MORON #undef IRQF_MODIFY_MASK @@ -94,6 +96,21 @@ static inline void irq_settings_set_norequest(struct irq_desc *desc) desc->status_use_accessors |= _IRQ_NOREQUEST; } +static inline bool irq_settings_can_thread(struct irq_desc *desc) +{ + return !(desc->status_use_accessors & _IRQ_NOTHREAD); +} + +static inline void irq_settings_clr_nothread(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_NOTHREAD; +} + +static inline void irq_settings_set_nothread(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NOTHREAD; +} + static inline bool irq_settings_can_probe(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOPROBE); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 3b79bd93833..74d1c099fbd 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -2,43 +2,23 @@ * jump label support * * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> + * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com> * */ -#include <linux/jump_label.h> #include <linux/memory.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/list.h> -#include <linux/jhash.h> #include <linux/slab.h> #include <linux/sort.h> #include <linux/err.h> +#include <linux/jump_label.h> #ifdef HAVE_JUMP_LABEL -#define JUMP_LABEL_HASH_BITS 6 -#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS) -static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE]; - /* mutex to protect coming/going of the the jump_label table */ static DEFINE_MUTEX(jump_label_mutex); -struct jump_label_entry { - struct hlist_node hlist; - struct jump_entry *table; - int nr_entries; - /* hang modules off here */ - struct hlist_head modules; - unsigned long key; -}; - -struct jump_label_module_entry { - struct hlist_node hlist; - struct jump_entry *table; - int nr_entries; - struct module *mod; -}; - void jump_label_lock(void) { mutex_lock(&jump_label_mutex); @@ -49,6 +29,11 @@ void jump_label_unlock(void) mutex_unlock(&jump_label_mutex); } +bool jump_label_enabled(struct jump_label_key *key) +{ + return !!atomic_read(&key->enabled); +} + static int jump_label_cmp(const void *a, const void *b) { const struct jump_entry *jea = a; @@ -64,7 +49,7 @@ static int jump_label_cmp(const void *a, const void *b) } static void -sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) +jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) { unsigned long size; @@ -73,118 +58,25 @@ sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); } -static struct jump_label_entry *get_jump_label_entry(jump_label_t key) -{ - struct hlist_head *head; - struct hlist_node *node; - struct jump_label_entry *e; - u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0); - - head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (key == e->key) - return e; - } - return NULL; -} +static void jump_label_update(struct jump_label_key *key, int enable); -static struct jump_label_entry * -add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table) +void jump_label_inc(struct jump_label_key *key) { - struct hlist_head *head; - struct jump_label_entry *e; - u32 hash; - - e = get_jump_label_entry(key); - if (e) - return ERR_PTR(-EEXIST); - - e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - - hash = jhash((void *)&key, sizeof(jump_label_t), 0); - head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; - e->key = key; - e->table = table; - e->nr_entries = nr_entries; - INIT_HLIST_HEAD(&(e->modules)); - hlist_add_head(&e->hlist, head); - return e; -} + if (atomic_inc_not_zero(&key->enabled)) + return; -static int -build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop) -{ - struct jump_entry *iter, *iter_begin; - struct jump_label_entry *entry; - int count; - - sort_jump_label_entries(start, stop); - iter = start; - while (iter < stop) { - entry = get_jump_label_entry(iter->key); - if (!entry) { - iter_begin = iter; - count = 0; - while ((iter < stop) && - (iter->key == iter_begin->key)) { - iter++; - count++; - } - entry = add_jump_label_entry(iter_begin->key, - count, iter_begin); - if (IS_ERR(entry)) - return PTR_ERR(entry); - } else { - WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n"); - return -1; - } - } - return 0; + jump_label_lock(); + if (atomic_add_return(1, &key->enabled) == 1) + jump_label_update(key, JUMP_LABEL_ENABLE); + jump_label_unlock(); } -/*** - * jump_label_update - update jump label text - * @key - key value associated with a a jump label - * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE - * - * Will enable/disable the jump for jump label @key, depending on the - * value of @type. - * - */ - -void jump_label_update(unsigned long key, enum jump_label_type type) +void jump_label_dec(struct jump_label_key *key) { - struct jump_entry *iter; - struct jump_label_entry *entry; - struct hlist_node *module_node; - struct jump_label_module_entry *e_module; - int count; + if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) + return; - jump_label_lock(); - entry = get_jump_label_entry((jump_label_t)key); - if (entry) { - count = entry->nr_entries; - iter = entry->table; - while (count--) { - if (kernel_text_address(iter->code)) - arch_jump_label_transform(iter, type); - iter++; - } - /* eanble/disable jump labels in modules */ - hlist_for_each_entry(e_module, module_node, &(entry->modules), - hlist) { - count = e_module->nr_entries; - iter = e_module->table; - while (count--) { - if (iter->key && - kernel_text_address(iter->code)) - arch_jump_label_transform(iter, type); - iter++; - } - } - } + jump_label_update(key, JUMP_LABEL_DISABLE); jump_label_unlock(); } @@ -197,77 +89,33 @@ static int addr_conflict(struct jump_entry *entry, void *start, void *end) return 0; } -#ifdef CONFIG_MODULES - -static int module_conflict(void *start, void *end) +static int __jump_label_text_reserved(struct jump_entry *iter_start, + struct jump_entry *iter_stop, void *start, void *end) { - struct hlist_head *head; - struct hlist_node *node, *node_next, *module_node, *module_node_next; - struct jump_label_entry *e; - struct jump_label_module_entry *e_module; struct jump_entry *iter; - int i, count; - int conflict = 0; - - for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { - head = &jump_label_table[i]; - hlist_for_each_entry_safe(e, node, node_next, head, hlist) { - hlist_for_each_entry_safe(e_module, module_node, - module_node_next, - &(e->modules), hlist) { - count = e_module->nr_entries; - iter = e_module->table; - while (count--) { - if (addr_conflict(iter, start, end)) { - conflict = 1; - goto out; - } - iter++; - } - } - } - } -out: - return conflict; -} - -#endif - -/*** - * jump_label_text_reserved - check if addr range is reserved - * @start: start text addr - * @end: end text addr - * - * checks if the text addr located between @start and @end - * overlaps with any of the jump label patch addresses. Code - * that wants to modify kernel text should first verify that - * it does not overlap with any of the jump label addresses. - * Caller must hold jump_label_mutex. - * - * returns 1 if there is an overlap, 0 otherwise - */ -int jump_label_text_reserved(void *start, void *end) -{ - struct jump_entry *iter; - struct jump_entry *iter_start = __start___jump_table; - struct jump_entry *iter_stop = __start___jump_table; - int conflict = 0; iter = iter_start; while (iter < iter_stop) { - if (addr_conflict(iter, start, end)) { - conflict = 1; - goto out; - } + if (addr_conflict(iter, start, end)) + return 1; iter++; } - /* now check modules */ -#ifdef CONFIG_MODULES - conflict = module_conflict(start, end); -#endif -out: - return conflict; + return 0; +} + +static void __jump_label_update(struct jump_label_key *key, + struct jump_entry *entry, int enable) +{ + for (; entry->key == (jump_label_t)(unsigned long)key; entry++) { + /* + * entry->code set to 0 invalidates module init text sections + * kernel_text_address() verifies we are not in core kernel + * init code, see jump_label_invalidate_module_init(). + */ + if (entry->code && kernel_text_address(entry->code)) + arch_jump_label_transform(entry, enable); + } } /* @@ -277,142 +125,173 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr) { } -static __init int init_jump_label(void) +static __init int jump_label_init(void) { - int ret; struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; + struct jump_label_key *key = NULL; struct jump_entry *iter; jump_label_lock(); - ret = build_jump_label_hashtable(__start___jump_table, - __stop___jump_table); - iter = iter_start; - while (iter < iter_stop) { + jump_label_sort_entries(iter_start, iter_stop); + + for (iter = iter_start; iter < iter_stop; iter++) { arch_jump_label_text_poke_early(iter->code); - iter++; + if (iter->key == (jump_label_t)(unsigned long)key) + continue; + + key = (struct jump_label_key *)(unsigned long)iter->key; + atomic_set(&key->enabled, 0); + key->entries = iter; +#ifdef CONFIG_MODULES + key->next = NULL; +#endif } jump_label_unlock(); - return ret; + + return 0; } -early_initcall(init_jump_label); +early_initcall(jump_label_init); #ifdef CONFIG_MODULES -static struct jump_label_module_entry * -add_jump_label_module_entry(struct jump_label_entry *entry, - struct jump_entry *iter_begin, - int count, struct module *mod) +struct jump_label_mod { + struct jump_label_mod *next; + struct jump_entry *entries; + struct module *mod; +}; + +static int __jump_label_mod_text_reserved(void *start, void *end) +{ + struct module *mod; + + mod = __module_text_address((unsigned long)start); + if (!mod) + return 0; + + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + + return __jump_label_text_reserved(mod->jump_entries, + mod->jump_entries + mod->num_jump_entries, + start, end); +} + +static void __jump_label_mod_update(struct jump_label_key *key, int enable) +{ + struct jump_label_mod *mod = key->next; + + while (mod) { + __jump_label_update(key, mod->entries, enable); + mod = mod->next; + } +} + +/*** + * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() + * @mod: module to patch + * + * Allow for run-time selection of the optimal nops. Before the module + * loads patch these with arch_get_jump_label_nop(), which is specified by + * the arch specific jump label code. + */ +void jump_label_apply_nops(struct module *mod) { - struct jump_label_module_entry *e; - - e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - e->mod = mod; - e->nr_entries = count; - e->table = iter_begin; - hlist_add_head(&e->hlist, &entry->modules); - return e; + struct jump_entry *iter_start = mod->jump_entries; + struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; + struct jump_entry *iter; + + /* if the module doesn't have jump label entries, just return */ + if (iter_start == iter_stop) + return; + + for (iter = iter_start; iter < iter_stop; iter++) + arch_jump_label_text_poke_early(iter->code); } -static int add_jump_label_module(struct module *mod) +static int jump_label_add_module(struct module *mod) { - struct jump_entry *iter, *iter_begin; - struct jump_label_entry *entry; - struct jump_label_module_entry *module_entry; - int count; + struct jump_entry *iter_start = mod->jump_entries; + struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; + struct jump_entry *iter; + struct jump_label_key *key = NULL; + struct jump_label_mod *jlm; /* if the module doesn't have jump label entries, just return */ - if (!mod->num_jump_entries) + if (iter_start == iter_stop) return 0; - sort_jump_label_entries(mod->jump_entries, - mod->jump_entries + mod->num_jump_entries); - iter = mod->jump_entries; - while (iter < mod->jump_entries + mod->num_jump_entries) { - entry = get_jump_label_entry(iter->key); - iter_begin = iter; - count = 0; - while ((iter < mod->jump_entries + mod->num_jump_entries) && - (iter->key == iter_begin->key)) { - iter++; - count++; - } - if (!entry) { - entry = add_jump_label_entry(iter_begin->key, 0, NULL); - if (IS_ERR(entry)) - return PTR_ERR(entry); + jump_label_sort_entries(iter_start, iter_stop); + + for (iter = iter_start; iter < iter_stop; iter++) { + if (iter->key == (jump_label_t)(unsigned long)key) + continue; + + key = (struct jump_label_key *)(unsigned long)iter->key; + + if (__module_address(iter->key) == mod) { + atomic_set(&key->enabled, 0); + key->entries = iter; + key->next = NULL; + continue; } - module_entry = add_jump_label_module_entry(entry, iter_begin, - count, mod); - if (IS_ERR(module_entry)) - return PTR_ERR(module_entry); + + jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL); + if (!jlm) + return -ENOMEM; + + jlm->mod = mod; + jlm->entries = iter; + jlm->next = key->next; + key->next = jlm; + + if (jump_label_enabled(key)) + __jump_label_update(key, iter, JUMP_LABEL_ENABLE); } + return 0; } -static void remove_jump_label_module(struct module *mod) +static void jump_label_del_module(struct module *mod) { - struct hlist_head *head; - struct hlist_node *node, *node_next, *module_node, *module_node_next; - struct jump_label_entry *e; - struct jump_label_module_entry *e_module; - int i; + struct jump_entry *iter_start = mod->jump_entries; + struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; + struct jump_entry *iter; + struct jump_label_key *key = NULL; + struct jump_label_mod *jlm, **prev; - /* if the module doesn't have jump label entries, just return */ - if (!mod->num_jump_entries) - return; + for (iter = iter_start; iter < iter_stop; iter++) { + if (iter->key == (jump_label_t)(unsigned long)key) + continue; + + key = (struct jump_label_key *)(unsigned long)iter->key; + + if (__module_address(iter->key) == mod) + continue; + + prev = &key->next; + jlm = key->next; - for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { - head = &jump_label_table[i]; - hlist_for_each_entry_safe(e, node, node_next, head, hlist) { - hlist_for_each_entry_safe(e_module, module_node, - module_node_next, - &(e->modules), hlist) { - if (e_module->mod == mod) { - hlist_del(&e_module->hlist); - kfree(e_module); - } - } - if (hlist_empty(&e->modules) && (e->nr_entries == 0)) { - hlist_del(&e->hlist); - kfree(e); - } + while (jlm && jlm->mod != mod) { + prev = &jlm->next; + jlm = jlm->next; + } + + if (jlm) { + *prev = jlm->next; + kfree(jlm); } } } -static void remove_jump_label_module_init(struct module *mod) +static void jump_label_invalidate_module_init(struct module *mod) { - struct hlist_head *head; - struct hlist_node *node, *node_next, *module_node, *module_node_next; - struct jump_label_entry *e; - struct jump_label_module_entry *e_module; + struct jump_entry *iter_start = mod->jump_entries; + struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; - int i, count; - - /* if the module doesn't have jump label entries, just return */ - if (!mod->num_jump_entries) - return; - for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { - head = &jump_label_table[i]; - hlist_for_each_entry_safe(e, node, node_next, head, hlist) { - hlist_for_each_entry_safe(e_module, module_node, - module_node_next, - &(e->modules), hlist) { - if (e_module->mod != mod) - continue; - count = e_module->nr_entries; - iter = e_module->table; - while (count--) { - if (within_module_init(iter->code, mod)) - iter->key = 0; - iter++; - } - } - } + for (iter = iter_start; iter < iter_stop; iter++) { + if (within_module_init(iter->code, mod)) + iter->code = 0; } } @@ -426,59 +305,77 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, switch (val) { case MODULE_STATE_COMING: jump_label_lock(); - ret = add_jump_label_module(mod); + ret = jump_label_add_module(mod); if (ret) - remove_jump_label_module(mod); + jump_label_del_module(mod); jump_label_unlock(); break; case MODULE_STATE_GOING: jump_label_lock(); - remove_jump_label_module(mod); + jump_label_del_module(mod); jump_label_unlock(); break; case MODULE_STATE_LIVE: jump_label_lock(); - remove_jump_label_module_init(mod); + jump_label_invalidate_module_init(mod); jump_label_unlock(); break; } - return ret; -} -/*** - * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() - * @mod: module to patch - * - * Allow for run-time selection of the optimal nops. Before the module - * loads patch these with arch_get_jump_label_nop(), which is specified by - * the arch specific jump label code. - */ -void jump_label_apply_nops(struct module *mod) -{ - struct jump_entry *iter; - - /* if the module doesn't have jump label entries, just return */ - if (!mod->num_jump_entries) - return; - - iter = mod->jump_entries; - while (iter < mod->jump_entries + mod->num_jump_entries) { - arch_jump_label_text_poke_early(iter->code); - iter++; - } + return notifier_from_errno(ret); } struct notifier_block jump_label_module_nb = { .notifier_call = jump_label_module_notify, - .priority = 0, + .priority = 1, /* higher than tracepoints */ }; -static __init int init_jump_label_module(void) +static __init int jump_label_init_module(void) { return register_module_notifier(&jump_label_module_nb); } -early_initcall(init_jump_label_module); +early_initcall(jump_label_init_module); #endif /* CONFIG_MODULES */ +/*** + * jump_label_text_reserved - check if addr range is reserved + * @start: start text addr + * @end: end text addr + * + * checks if the text addr located between @start and @end + * overlaps with any of the jump label patch addresses. Code + * that wants to modify kernel text should first verify that + * it does not overlap with any of the jump label addresses. + * Caller must hold jump_label_mutex. + * + * returns 1 if there is an overlap, 0 otherwise + */ +int jump_label_text_reserved(void *start, void *end) +{ + int ret = __jump_label_text_reserved(__start___jump_table, + __stop___jump_table, start, end); + + if (ret) + return ret; + +#ifdef CONFIG_MODULES + ret = __jump_label_mod_text_reserved(start, end); +#endif + return ret; +} + +static void jump_label_update(struct jump_label_key *key, int enable) +{ + struct jump_entry *entry = key->entries; + + /* if there are no users, entry can be NULL */ + if (entry) + __jump_label_update(key, entry, enable); + +#ifdef CONFIG_MODULES + __jump_label_mod_update(key, enable); +#endif +} + #endif diff --git a/kernel/kexec.c b/kernel/kexec.c index 87b77de03dd..8d814cbc810 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1531,13 +1531,7 @@ int kernel_kexec(void) if (error) goto Enable_cpus; local_irq_disable(); - /* Suspend system devices */ - error = sysdev_suspend(PMSG_FREEZE); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (error) goto Enable_irqs; } else @@ -1553,7 +1547,6 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { syscore_resume(); - sysdev_resume(); Enable_irqs: local_irq_enable(); Enable_cpus: diff --git a/kernel/kmod.c b/kernel/kmod.c index 9cd0591c96a..5ae0ff38425 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -245,7 +245,6 @@ static void __call_usermodehelper(struct work_struct *work) } } -#ifdef CONFIG_PM_SLEEP /* * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY * (used for preventing user land processes from being created after the user @@ -301,6 +300,15 @@ void usermodehelper_enable(void) usermodehelper_disabled = 0; } +/** + * usermodehelper_is_disabled - check if new helpers are allowed to be started + */ +bool usermodehelper_is_disabled(void) +{ + return usermodehelper_disabled; +} +EXPORT_SYMBOL_GPL(usermodehelper_is_disabled); + static void helper_lock(void) { atomic_inc(&running_helpers); @@ -312,12 +320,6 @@ static void helper_unlock(void) if (atomic_dec_and_test(&running_helpers)) wake_up(&running_helpers_waitq); } -#else /* CONFIG_PM_SLEEP */ -#define usermodehelper_disabled 0 - -static inline void helper_lock(void) {} -static inline void helper_unlock(void) {} -#endif /* CONFIG_PM_SLEEP */ /** * call_usermodehelper_setup - prepare to call a usermode helper diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 53a68956f13..63437d065ac 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -490,6 +490,18 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]) usage[i] = '\0'; } +static int __print_lock_name(struct lock_class *class) +{ + char str[KSYM_NAME_LEN]; + const char *name; + + name = class->name; + if (!name) + name = __get_key_name(class->key, str); + + return printk("%s", name); +} + static void print_lock_name(struct lock_class *class) { char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS]; @@ -1053,6 +1065,56 @@ print_circular_bug_entry(struct lock_list *target, int depth) return 0; } +static void +print_circular_lock_scenario(struct held_lock *src, + struct held_lock *tgt, + struct lock_list *prt) +{ + struct lock_class *source = hlock_class(src); + struct lock_class *target = hlock_class(tgt); + struct lock_class *parent = prt->class; + + /* + * A direct locking problem where unsafe_class lock is taken + * directly by safe_class lock, then all we need to show + * is the deadlock scenario, as it is obvious that the + * unsafe lock is taken under the safe lock. + * + * But if there is a chain instead, where the safe lock takes + * an intermediate lock (middle_class) where this lock is + * not the same as the safe lock, then the lock chain is + * used to describe the problem. Otherwise we would need + * to show a different CPU case for each link in the chain + * from the safe_class lock to the unsafe_class lock. + */ + if (parent != source) { + printk("Chain exists of:\n "); + __print_lock_name(source); + printk(" --> "); + __print_lock_name(parent); + printk(" --> "); + __print_lock_name(target); + printk("\n\n"); + } + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(parent); + printk(");\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(source); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + /* * When a circular dependency is detected, print the * header first: @@ -1096,6 +1158,7 @@ static noinline int print_circular_bug(struct lock_list *this, { struct task_struct *curr = current; struct lock_list *parent; + struct lock_list *first_parent; int depth; if (!debug_locks_off_graph_unlock() || debug_locks_silent) @@ -1109,6 +1172,7 @@ static noinline int print_circular_bug(struct lock_list *this, print_circular_bug_header(target, depth, check_src, check_tgt); parent = get_lock_parent(target); + first_parent = parent; while (parent) { print_circular_bug_entry(parent, --depth); @@ -1116,6 +1180,9 @@ static noinline int print_circular_bug(struct lock_list *this, } printk("\nother info that might help us debug this:\n\n"); + print_circular_lock_scenario(check_src, check_tgt, + first_parent); + lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); @@ -1314,7 +1381,7 @@ print_shortest_lock_dependencies(struct lock_list *leaf, printk("\n"); if (depth == 0 && (entry != root)) { - printk("lockdep:%s bad BFS generated tree\n", __func__); + printk("lockdep:%s bad path found in chain graph\n", __func__); break; } @@ -1325,6 +1392,62 @@ print_shortest_lock_dependencies(struct lock_list *leaf, return; } +static void +print_irq_lock_scenario(struct lock_list *safe_entry, + struct lock_list *unsafe_entry, + struct lock_class *prev_class, + struct lock_class *next_class) +{ + struct lock_class *safe_class = safe_entry->class; + struct lock_class *unsafe_class = unsafe_entry->class; + struct lock_class *middle_class = prev_class; + + if (middle_class == safe_class) + middle_class = next_class; + + /* + * A direct locking problem where unsafe_class lock is taken + * directly by safe_class lock, then all we need to show + * is the deadlock scenario, as it is obvious that the + * unsafe lock is taken under the safe lock. + * + * But if there is a chain instead, where the safe lock takes + * an intermediate lock (middle_class) where this lock is + * not the same as the safe lock, then the lock chain is + * used to describe the problem. Otherwise we would need + * to show a different CPU case for each link in the chain + * from the safe_class lock to the unsafe_class lock. + */ + if (middle_class != unsafe_class) { + printk("Chain exists of:\n "); + __print_lock_name(safe_class); + printk(" --> "); + __print_lock_name(middle_class); + printk(" --> "); + __print_lock_name(unsafe_class); + printk("\n\n"); + } + + printk(" Possible interrupt unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(unsafe_class); + printk(");\n"); + printk(" local_irq_disable();\n"); + printk(" lock("); + __print_lock_name(safe_class); + printk(");\n"); + printk(" lock("); + __print_lock_name(middle_class); + printk(");\n"); + printk(" <Interrupt>\n"); + printk(" lock("); + __print_lock_name(safe_class); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + static int print_bad_irq_dependency(struct task_struct *curr, struct lock_list *prev_root, @@ -1376,6 +1499,9 @@ print_bad_irq_dependency(struct task_struct *curr, print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); printk("\nother info that might help us debug this:\n\n"); + print_irq_lock_scenario(backwards_entry, forwards_entry, + hlock_class(prev), hlock_class(next)); + lockdep_print_held_locks(curr); printk("\nthe dependencies between %s-irq-safe lock", irqclass); @@ -1539,6 +1665,26 @@ static inline void inc_chains(void) #endif +static void +print_deadlock_scenario(struct held_lock *nxt, + struct held_lock *prv) +{ + struct lock_class *next = hlock_class(nxt); + struct lock_class *prev = hlock_class(prv); + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0\n"); + printk(" ----\n"); + printk(" lock("); + __print_lock_name(prev); + printk(");\n"); + printk(" lock("); + __print_lock_name(next); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); + printk(" May be due to missing lock nesting notation\n\n"); +} + static int print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, struct held_lock *next) @@ -1557,6 +1703,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, print_lock(prev); printk("\nother info that might help us debug this:\n"); + print_deadlock_scenario(next, prev); lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); @@ -1826,7 +1973,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, struct list_head *hash_head = chainhashentry(chain_key); struct lock_chain *chain; struct held_lock *hlock_curr, *hlock_next; - int i, j, n, cn; + int i, j; if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return 0; @@ -1886,15 +2033,9 @@ cache_hit: } i++; chain->depth = curr->lockdep_depth + 1 - i; - cn = nr_chain_hlocks; - while (cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS) { - n = cmpxchg(&nr_chain_hlocks, cn, cn + chain->depth); - if (n == cn) - break; - cn = n; - } - if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { - chain->base = cn; + if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { + chain->base = nr_chain_hlocks; + nr_chain_hlocks += chain->depth; for (j = 0; j < chain->depth - 1; j++, i++) { int lock_id = curr->held_locks[i].class_idx - 1; chain_hlocks[chain->base + j] = lock_id; @@ -2011,6 +2152,24 @@ static void check_chain_key(struct task_struct *curr) #endif } +static void +print_usage_bug_scenario(struct held_lock *lock) +{ + struct lock_class *class = hlock_class(lock); + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0\n"); + printk(" ----\n"); + printk(" lock("); + __print_lock_name(class); + printk(");\n"); + printk(" <Interrupt>\n"); + printk(" lock("); + __print_lock_name(class); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + static int print_usage_bug(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) @@ -2039,6 +2198,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, print_irqtrace_events(curr); printk("\nother info that might help us debug this:\n"); + print_usage_bug_scenario(this); + lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); @@ -2073,6 +2234,10 @@ print_irq_inversion_bug(struct task_struct *curr, struct held_lock *this, int forwards, const char *irqclass) { + struct lock_list *entry = other; + struct lock_list *middle = NULL; + int depth; + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; @@ -2091,6 +2256,25 @@ print_irq_inversion_bug(struct task_struct *curr, printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); printk("\nother info that might help us debug this:\n"); + + /* Find a middle lock (if one exists) */ + depth = get_lock_depth(other); + do { + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + middle = entry; + entry = get_lock_parent(entry); + depth--; + } while (entry && entry != root && (depth >= 0)); + if (forwards) + print_irq_lock_scenario(root, other, + middle ? middle->class : root->class, other->class); + else + print_irq_lock_scenario(other, root, + middle ? middle->class : other->class, root->class); + lockdep_print_held_locks(curr); printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); diff --git a/kernel/module.c b/kernel/module.c index d5938a5c19c..22879725678 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -57,6 +57,7 @@ #include <linux/kmemleak.h> #include <linux/jump_label.h> #include <linux/pfn.h> +#include <linux/bsearch.h> #define CREATE_TRACE_POINTS #include <trace/events/module.h> @@ -240,23 +241,24 @@ static bool each_symbol_in_section(const struct symsearch *arr, struct module *owner, bool (*fn)(const struct symsearch *syms, struct module *owner, - unsigned int symnum, void *data), + void *data), void *data) { - unsigned int i, j; + unsigned int j; for (j = 0; j < arrsize; j++) { - for (i = 0; i < arr[j].stop - arr[j].start; i++) - if (fn(&arr[j], owner, i, data)) - return true; + if (fn(&arr[j], owner, data)) + return true; } return false; } /* Returns true as soon as fn returns true, otherwise false. */ -bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, - unsigned int symnum, void *data), void *data) +bool each_symbol_section(bool (*fn)(const struct symsearch *arr, + struct module *owner, + void *data), + void *data) { struct module *mod; static const struct symsearch arr[] = { @@ -309,7 +311,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, } return false; } -EXPORT_SYMBOL_GPL(each_symbol); +EXPORT_SYMBOL_GPL(each_symbol_section); struct find_symbol_arg { /* Input */ @@ -323,15 +325,12 @@ struct find_symbol_arg { const struct kernel_symbol *sym; }; -static bool find_symbol_in_section(const struct symsearch *syms, - struct module *owner, - unsigned int symnum, void *data) +static bool check_symbol(const struct symsearch *syms, + struct module *owner, + unsigned int symnum, void *data) { struct find_symbol_arg *fsa = data; - if (strcmp(syms->start[symnum].name, fsa->name) != 0) - return false; - if (!fsa->gplok) { if (syms->licence == GPL_ONLY) return false; @@ -365,6 +364,30 @@ static bool find_symbol_in_section(const struct symsearch *syms, return true; } +static int cmp_name(const void *va, const void *vb) +{ + const char *a; + const struct kernel_symbol *b; + a = va; b = vb; + return strcmp(a, b->name); +} + +static bool find_symbol_in_section(const struct symsearch *syms, + struct module *owner, + void *data) +{ + struct find_symbol_arg *fsa = data; + struct kernel_symbol *sym; + + sym = bsearch(fsa->name, syms->start, syms->stop - syms->start, + sizeof(struct kernel_symbol), cmp_name); + + if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data)) + return true; + + return false; +} + /* Find a symbol and return it, along with, (optional) crc and * (optional) module which owns it. Needs preempt disabled or module_mutex. */ const struct kernel_symbol *find_symbol(const char *name, @@ -379,7 +402,7 @@ const struct kernel_symbol *find_symbol(const char *name, fsa.gplok = gplok; fsa.warn = warn; - if (each_symbol(find_symbol_in_section, &fsa)) { + if (each_symbol_section(find_symbol_in_section, &fsa)) { if (owner) *owner = fsa.owner; if (crc) @@ -1607,27 +1630,28 @@ static void set_section_ro_nx(void *base, } } -/* Setting memory back to RW+NX before releasing it */ -void unset_section_ro_nx(struct module *mod, void *module_region) +static void unset_module_core_ro_nx(struct module *mod) { - unsigned long total_pages; - - if (mod->module_core == module_region) { - /* Set core as NX+RW */ - total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); - set_memory_nx((unsigned long)mod->module_core, total_pages); - set_memory_rw((unsigned long)mod->module_core, total_pages); + set_page_attributes(mod->module_core + mod->core_text_size, + mod->module_core + mod->core_size, + set_memory_x); + set_page_attributes(mod->module_core, + mod->module_core + mod->core_ro_size, + set_memory_rw); +} - } else if (mod->module_init == module_region) { - /* Set init as NX+RW */ - total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); - set_memory_nx((unsigned long)mod->module_init, total_pages); - set_memory_rw((unsigned long)mod->module_init, total_pages); - } +static void unset_module_init_ro_nx(struct module *mod) +{ + set_page_attributes(mod->module_init + mod->init_text_size, + mod->module_init + mod->init_size, + set_memory_x); + set_page_attributes(mod->module_init, + mod->module_init + mod->init_ro_size, + set_memory_rw); } /* Iterate through all modules and set each module's text as RW */ -void set_all_modules_text_rw() +void set_all_modules_text_rw(void) { struct module *mod; @@ -1648,7 +1672,7 @@ void set_all_modules_text_rw() } /* Iterate through all modules and set each module's text as RO */ -void set_all_modules_text_ro() +void set_all_modules_text_ro(void) { struct module *mod; @@ -1669,7 +1693,8 @@ void set_all_modules_text_ro() } #else static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } -static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } +static void unset_module_core_ro_nx(struct module *mod) { } +static void unset_module_init_ro_nx(struct module *mod) { } #endif /* Free a module, remove from lists, etc. */ @@ -1696,7 +1721,7 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ - unset_section_ro_nx(mod, mod->module_init); + unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1705,7 +1730,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ - unset_section_ro_nx(mod, mod->module_core); + unset_module_core_ro_nx(mod); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -2030,11 +2055,8 @@ static const struct kernel_symbol *lookup_symbol(const char *name, const struct kernel_symbol *start, const struct kernel_symbol *stop) { - const struct kernel_symbol *ks = start; - for (; ks < stop; ks++) - if (strcmp(ks->name, name) == 0) - return ks; - return NULL; + return bsearch(name, start, stop - start, + sizeof(struct kernel_symbol), cmp_name); } static int is_exported(const char *name, unsigned long value, @@ -2931,10 +2953,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif - unset_section_ro_nx(mod, mod->module_init); + unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; + mod->init_ro_size = 0; mod->init_text_size = 0; mutex_unlock(&module_mutex); diff --git a/kernel/params.c b/kernel/params.c index 7ab388a48a2..ed72e133086 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -297,21 +297,15 @@ EXPORT_SYMBOL(param_ops_charp); int param_set_bool(const char *val, const struct kernel_param *kp) { bool v; + int ret; /* No equals means "set"... */ if (!val) val = "1"; /* One of =[yYnN01] */ - switch (val[0]) { - case 'y': case 'Y': case '1': - v = true; - break; - case 'n': case 'N': case '0': - v = false; - break; - default: - return -EINVAL; - } + ret = strtobool(val, &v); + if (ret) + return ret; if (kp->flags & KPARAM_ISBOOL) *(bool *)kp->arg = v; @@ -821,15 +815,18 @@ ssize_t __modver_version_show(struct module_attribute *mattr, return sprintf(buf, "%s\n", vattr->version); } -extern struct module_version_attribute __start___modver[], __stop___modver[]; +extern const struct module_version_attribute *__start___modver[]; +extern const struct module_version_attribute *__stop___modver[]; static void __init version_sysfs_builtin(void) { - const struct module_version_attribute *vattr; + const struct module_version_attribute **p; struct module_kobject *mk; int err; - for (vattr = __start___modver; vattr < __stop___modver; vattr++) { + for (p = __start___modver; p < __stop___modver; p++) { + const struct module_version_attribute *vattr = *p; + mk = locate_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 6de9a8fc341..87f4d24b55b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -125,12 +125,6 @@ config PM_DEBUG code. This is helpful when debugging and reporting PM bugs, like suspend support. -config PM_VERBOSE - bool "Verbose Power Management debugging" - depends on PM_DEBUG - ---help--- - This option enables verbose messages from the Power Management code. - config PM_ADVANCED_DEBUG bool "Extra PM attributes in sysfs for low-level debugging/testing" depends on PM_DEBUG @@ -229,3 +223,7 @@ config PM_OPP representing individual voltage domains and provides SOC implementations a ready to use framework to manage OPPs. For more information, read <file:Documentation/power/opp.txt> + +config PM_RUNTIME_CLK + def_bool y + depends on PM_RUNTIME && HAVE_CLK diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 50aae660174..f9bec56d882 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -272,12 +272,7 @@ static int create_image(int platform_mode) local_irq_disable(); - error = sysdev_suspend(PMSG_FREEZE); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (error) { printk(KERN_ERR "PM: Some system devices failed to power down, " "aborting hibernation\n"); @@ -302,7 +297,6 @@ static int create_image(int platform_mode) Power_up: syscore_resume(); - sysdev_resume(); /* NOTE: dpm_resume_noirq() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ @@ -333,20 +327,25 @@ static int create_image(int platform_mode) int hibernation_snapshot(int platform_mode) { + pm_message_t msg = PMSG_RECOVER; int error; error = platform_begin(platform_mode); if (error) goto Close; + error = dpm_prepare(PMSG_FREEZE); + if (error) + goto Complete_devices; + /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); if (error) - goto Close; + goto Complete_devices; suspend_console(); pm_restrict_gfp_mask(); - error = dpm_suspend_start(PMSG_FREEZE); + error = dpm_suspend(PMSG_FREEZE); if (error) goto Recover_platform; @@ -364,13 +363,17 @@ int hibernation_snapshot(int platform_mode) if (error || !in_suspend) swsusp_free(); - dpm_resume_end(in_suspend ? - (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE; + dpm_resume(msg); if (error || !in_suspend) pm_restore_gfp_mask(); resume_console(); + + Complete_devices: + dpm_complete(msg); + Close: platform_end(platform_mode); return error; @@ -409,12 +412,7 @@ static int resume_target_kernel(bool platform_mode) local_irq_disable(); - error = sysdev_suspend(PMSG_QUIESCE); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (error) goto Enable_irqs; @@ -442,7 +440,6 @@ static int resume_target_kernel(bool platform_mode) touch_softlockup_watchdog(); syscore_resume(); - sysdev_resume(); Enable_irqs: local_irq_enable(); @@ -528,7 +525,6 @@ int hibernation_platform_enter(void) goto Platform_finish; local_irq_disable(); - sysdev_suspend(PMSG_HIBERNATE); syscore_suspend(); if (pm_wakeup_pending()) { error = -EAGAIN; @@ -541,7 +537,6 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); - sysdev_resume(); local_irq_enable(); enable_nonboot_cpus(); @@ -982,10 +977,33 @@ static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *att power_attr(image_size); +static ssize_t reserved_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", reserved_size); +} + +static ssize_t reserved_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + reserved_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(reserved_size); + static struct attribute * g[] = { &disk_attr.attr, &resume_attr.attr, &image_size_attr.attr, + &reserved_size_attr.attr, NULL, }; diff --git a/kernel/power/main.c b/kernel/power/main.c index de9aef8742f..2981af4ce7c 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -337,6 +337,7 @@ static int __init pm_init(void) if (error) return error; hibernate_image_size_init(); + hibernate_reserved_size_init(); power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; diff --git a/kernel/power/power.h b/kernel/power/power.h index 03634be55f6..9a00a0a2628 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -15,6 +15,7 @@ struct swsusp_info { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ +extern void __init hibernate_reserved_size_init(void); extern void __init hibernate_image_size_init(void); #ifdef CONFIG_ARCH_HIBERNATION_HEADER @@ -55,6 +56,7 @@ extern int hibernation_platform_enter(void); #else /* !CONFIG_HIBERNATION */ +static inline void hibernate_reserved_size_init(void) {} static inline void hibernate_image_size_init(void) {} #endif /* !CONFIG_HIBERNATION */ @@ -72,6 +74,8 @@ static struct kobj_attribute _name##_attr = { \ /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; +/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */ +extern unsigned long reserved_size; extern int in_suspend; extern dev_t swsusp_resume_device; extern sector_t swsusp_resume_block; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ca0aacc2487..ace55889f70 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -41,16 +41,28 @@ static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); /* + * Number of bytes to reserve for memory allocations made by device drivers + * from their ->freeze() and ->freeze_noirq() callbacks so that they don't + * cause image creation to fail (tunable via /sys/power/reserved_size). + */ +unsigned long reserved_size; + +void __init hibernate_reserved_size_init(void) +{ + reserved_size = SPARE_PAGES * PAGE_SIZE; +} + +/* * Preferred image size in bytes (tunable via /sys/power/image_size). - * When it is set to N, the image creating code will do its best to - * ensure the image size will not exceed N bytes, but if that is - * impossible, it will try to create the smallest image possible. + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. */ unsigned long image_size; void __init hibernate_image_size_init(void) { - image_size = (totalram_pages / 3) * PAGE_SIZE; + image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; } /* List of PBEs needed for restoring the pages that were allocated before @@ -1263,11 +1275,13 @@ static unsigned long minimum_image_size(unsigned long saveable) * frame in use. We also need a number of page frames to be free during * hibernation for allocations made while saving the image and for device * drivers, in case they need to allocate memory from their hibernation - * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES, - * respectively, both of which are rough estimates). To make this happen, we - * compute the total number of available page frames and allocate at least + * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough + * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * /sys/power/reserved_size, respectively). To make this happen, we compute the + * total number of available page frames and allocate at least * - * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES + * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) * * of them, which corresponds to the maximum size of a hibernation image. * @@ -1322,7 +1336,8 @@ int hibernate_preallocate_memory(void) count -= totalreserve_pages; /* Compute the maximum number of saveable pages to leave in memory. */ - max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; + max_size = (count - (size + PAGES_FOR_IO)) / 2 + - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); /* Compute the desired number of image pages specified by image_size. */ size = DIV_ROUND_UP(image_size, PAGE_SIZE); if (size > max_size) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8935369d503..1c41ba21541 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -163,19 +163,13 @@ static int suspend_enter(suspend_state_t state) arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); - error = sysdev_suspend(PMSG_SUSPEND); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (!error) { if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { error = suspend_ops->enter(state); events_check_enabled = false; } syscore_resume(); - sysdev_resume(); } arch_suspend_enable_irqs(); @@ -216,7 +210,6 @@ int suspend_devices_and_enter(suspend_state_t state) goto Close; } suspend_console(); - pm_restrict_gfp_mask(); suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); if (error) { @@ -227,13 +220,12 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_test(TEST_DEVICES)) goto Recover_platform; - suspend_enter(state); + error = suspend_enter(state); Resume_devices: suspend_test_start(); dpm_resume_end(PMSG_RESUME); suspend_test_finish("resume devices"); - pm_restore_gfp_mask(); resume_console(); Close: if (suspend_ops->end) @@ -294,7 +286,9 @@ int enter_state(suspend_state_t state) goto Finish; pr_debug("PM: Entering %s sleep\n", pm_states[state]); + pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); + pm_restore_gfp_mask(); Finish: pr_debug("PM: Finishing wakeup.\n"); diff --git a/kernel/power/user.c b/kernel/power/user.c index c36c3b9e8a8..7d02d33be69 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -135,8 +135,10 @@ static int snapshot_release(struct inode *inode, struct file *filp) free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); - if (data->frozen) + if (data->frozen) { + pm_restore_gfp_mask(); thaw_processes(); + } pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); @@ -379,6 +381,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, * PM_HIBERNATION_PREPARE */ error = suspend_devices_and_enter(PM_SUSPEND_MEM); + data->ready = 0; break; case SNAPSHOT_PLATFORM_SUPPORT: diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0943ed7a403..64b2a37c07d 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -183,6 +183,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); } +typedef struct task_group *rt_rq_iter_t; + +#define for_each_rt_rq(rt_rq, iter, rq) \ + for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \ + (&iter->list != &task_groups) && \ + (rt_rq = iter->rt_rq[cpu_of(rq)]); \ + iter = list_entry_rcu(iter->list.next, typeof(*iter), list)) + static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) { list_add_rcu(&rt_rq->leaf_rt_rq_list, @@ -288,6 +296,11 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(def_rt_bandwidth.rt_period); } +typedef struct rt_rq *rt_rq_iter_t; + +#define for_each_rt_rq(rt_rq, iter, rq) \ + for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) + static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) { } @@ -402,12 +415,13 @@ next: static void __disable_runtime(struct rq *rq) { struct root_domain *rd = rq->rd; + rt_rq_iter_t iter; struct rt_rq *rt_rq; if (unlikely(!scheduler_running)) return; - for_each_leaf_rt_rq(rt_rq, rq) { + for_each_rt_rq(rt_rq, iter, rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); s64 want; int i; @@ -487,6 +501,7 @@ static void disable_runtime(struct rq *rq) static void __enable_runtime(struct rq *rq) { + rt_rq_iter_t iter; struct rt_rq *rt_rq; if (unlikely(!scheduler_running)) @@ -495,7 +510,7 @@ static void __enable_runtime(struct rq *rq) /* * Reset each runqueue's bandwidth settings */ - for_each_leaf_rt_rq(rt_rq, rq) { + for_each_rt_rq(rt_rq, iter, rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); raw_spin_lock(&rt_b->rt_runtime_lock); @@ -1817,10 +1832,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); static void print_rt_stats(struct seq_file *m, int cpu) { + rt_rq_iter_t iter; struct rt_rq *rt_rq; rcu_read_lock(); - for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) + for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) print_rt_rq(m, cpu, rt_rq); rcu_read_unlock(); } diff --git a/kernel/sys.c b/kernel/sys.c index af468edf096..f0c10385f30 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -315,7 +315,6 @@ void kernel_restart_prepare(char *cmd) blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); - sysdev_shutdown(); syscore_shutdown(); } @@ -354,7 +353,6 @@ static void kernel_shutdown_prepare(enum system_states state) void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); - sysdev_shutdown(); syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); @@ -374,7 +372,6 @@ void kernel_power_off(void) if (pm_power_off_prepare) pm_power_off_prepare(); disable_nonboot_cpus(); - sysdev_shutdown(); syscore_shutdown(); printk(KERN_EMERG "Power down.\n"); kmsg_dump(KMSG_DUMP_POWEROFF); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6519cf62d9c..0e17c10f8a9 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -685,8 +685,8 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) /* Add clocksource to the clcoksource list */ mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } @@ -706,8 +706,8 @@ int clocksource_register(struct clocksource *cs) mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da800ffa810..723c7637e55 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -522,10 +522,11 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, */ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { + int cpu = smp_processor_id(); + /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; - int cpu = smp_processor_id(); bc->event_handler = tick_handle_oneshot_broadcast; clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); @@ -551,6 +552,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) tick_broadcast_set_event(tick_next_period, 1); } else bc->next_event.tv64 = KTIME_MAX; + } else { + /* + * The first cpu which switches to oneshot mode sets + * the bit for all other cpus which are in the general + * (periodic) broadcast mask. So the bit is set and + * would prevent the first broadcast enter after this + * to program the bc device. + */ + tick_broadcast_clear_oneshot(cpu); } } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ee24fa1935a..d017c2c82c4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -39,20 +39,26 @@ #include "trace_stat.h" #define FTRACE_WARN_ON(cond) \ - do { \ - if (WARN_ON(cond)) \ + ({ \ + int ___r = cond; \ + if (WARN_ON(___r)) \ ftrace_kill(); \ - } while (0) + ___r; \ + }) #define FTRACE_WARN_ON_ONCE(cond) \ - do { \ - if (WARN_ON_ONCE(cond)) \ + ({ \ + int ___r = cond; \ + if (WARN_ON_ONCE(___r)) \ ftrace_kill(); \ - } while (0) + ___r; \ + }) /* hash bits for specific function selection */ #define FTRACE_HASH_BITS 7 #define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) +#define FTRACE_HASH_DEFAULT_BITS 10 +#define FTRACE_HASH_MAX_BITS 12 /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; @@ -81,23 +87,29 @@ static struct ftrace_ops ftrace_list_end __read_mostly = .func = ftrace_stub, }; -static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; +static struct ftrace_ops global_ops; + +static void +ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); /* - * Traverse the ftrace_list, invoking all entries. The reason that we + * Traverse the ftrace_global_list, invoking all entries. The reason that we * can use rcu_dereference_raw() is that elements removed from this list * are simply leaked, so there is no need to interact with a grace-period * mechanism. The rcu_dereference_raw() calls are needed to handle - * concurrent insertions into the ftrace_list. + * concurrent insertions into the ftrace_global_list. * * Silly Alpha and silly pointer-speculation compiler optimizations! */ -static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_global_list_func(unsigned long ip, + unsigned long parent_ip) { - struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/ + struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { op->func(ip, parent_ip); @@ -147,46 +159,69 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) } #endif -static int __register_ftrace_function(struct ftrace_ops *ops) +static void update_global_ops(void) { - ops->next = ftrace_list; + ftrace_func_t func; + /* - * We are entering ops into the ftrace_list but another - * CPU might be walking that list. We need to make sure - * the ops->next pointer is valid before another CPU sees - * the ops pointer included into the ftrace_list. + * If there's only one function registered, then call that + * function directly. Otherwise, we need to iterate over the + * registered callers. */ - rcu_assign_pointer(ftrace_list, ops); + if (ftrace_global_list == &ftrace_list_end || + ftrace_global_list->next == &ftrace_list_end) + func = ftrace_global_list->func; + else + func = ftrace_global_list_func; - if (ftrace_enabled) { - ftrace_func_t func; + /* If we filter on pids, update to use the pid function */ + if (!list_empty(&ftrace_pids)) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } - if (ops->next == &ftrace_list_end) - func = ops->func; - else - func = ftrace_list_func; + global_ops.func = func; +} - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } +static void update_ftrace_function(void) +{ + ftrace_func_t func; + + update_global_ops(); + + /* + * If we are at the end of the list and this ops is + * not dynamic, then have the mcount trampoline call + * the function directly + */ + if (ftrace_ops_list == &ftrace_list_end || + (ftrace_ops_list->next == &ftrace_list_end && + !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC))) + func = ftrace_ops_list->func; + else + func = ftrace_ops_list_func; - /* - * For one func, simply call it directly. - * For more than one func, call the chain. - */ #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - ftrace_trace_function = func; + ftrace_trace_function = func; #else - __ftrace_trace_function = func; - ftrace_trace_function = ftrace_test_stop_func; + __ftrace_trace_function = func; + ftrace_trace_function = ftrace_test_stop_func; #endif - } +} - return 0; +static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) +{ + ops->next = *list; + /* + * We are entering ops into the list but another + * CPU might be walking that list. We need to make sure + * the ops->next pointer is valid before another CPU sees + * the ops pointer included into the list. + */ + rcu_assign_pointer(*list, ops); } -static int __unregister_ftrace_function(struct ftrace_ops *ops) +static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) { struct ftrace_ops **p; @@ -194,13 +229,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) * If we are removing the last function, then simply point * to the ftrace_stub. */ - if (ftrace_list == ops && ops->next == &ftrace_list_end) { - ftrace_trace_function = ftrace_stub; - ftrace_list = &ftrace_list_end; + if (*list == ops && ops->next == &ftrace_list_end) { + *list = &ftrace_list_end; return 0; } - for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) + for (p = list; *p != &ftrace_list_end; p = &(*p)->next) if (*p == ops) break; @@ -208,53 +242,83 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) return -1; *p = (*p)->next; + return 0; +} - if (ftrace_enabled) { - /* If we only have one func left, then call that directly */ - if (ftrace_list->next == &ftrace_list_end) { - ftrace_func_t func = ftrace_list->func; +static int __register_ftrace_function(struct ftrace_ops *ops) +{ + if (ftrace_disabled) + return -ENODEV; - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - ftrace_trace_function = func; -#else - __ftrace_trace_function = func; -#endif - } - } + if (FTRACE_WARN_ON(ops == &global_ops)) + return -EINVAL; + + if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EBUSY; + + if (!core_kernel_data((unsigned long)ops)) + ops->flags |= FTRACE_OPS_FL_DYNAMIC; + + if (ops->flags & FTRACE_OPS_FL_GLOBAL) { + int first = ftrace_global_list == &ftrace_list_end; + add_ftrace_ops(&ftrace_global_list, ops); + ops->flags |= FTRACE_OPS_FL_ENABLED; + if (first) + add_ftrace_ops(&ftrace_ops_list, &global_ops); + } else + add_ftrace_ops(&ftrace_ops_list, ops); + + if (ftrace_enabled) + update_ftrace_function(); return 0; } -static void ftrace_update_pid_func(void) +static int __unregister_ftrace_function(struct ftrace_ops *ops) { - ftrace_func_t func; + int ret; - if (ftrace_trace_function == ftrace_stub) - return; + if (ftrace_disabled) + return -ENODEV; -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - func = ftrace_trace_function; -#else - func = __ftrace_trace_function; -#endif + if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) + return -EBUSY; - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } else { - if (func == ftrace_pid_func) - func = ftrace_pid_function; - } + if (FTRACE_WARN_ON(ops == &global_ops)) + return -EINVAL; -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - ftrace_trace_function = func; -#else - __ftrace_trace_function = func; -#endif + if (ops->flags & FTRACE_OPS_FL_GLOBAL) { + ret = remove_ftrace_ops(&ftrace_global_list, ops); + if (!ret && ftrace_global_list == &ftrace_list_end) + ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops); + if (!ret) + ops->flags &= ~FTRACE_OPS_FL_ENABLED; + } else + ret = remove_ftrace_ops(&ftrace_ops_list, ops); + + if (ret < 0) + return ret; + + if (ftrace_enabled) + update_ftrace_function(); + + /* + * Dynamic ops may be freed, we must make sure that all + * callers are done before leaving this function. + */ + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) + synchronize_sched(); + + return 0; +} + +static void ftrace_update_pid_func(void) +{ + /* Only do something if we are tracing something */ + if (ftrace_trace_function == ftrace_stub) + return; + + update_ftrace_function(); } #ifdef CONFIG_FUNCTION_PROFILER @@ -888,8 +952,35 @@ enum { FTRACE_START_FUNC_RET = (1 << 3), FTRACE_STOP_FUNC_RET = (1 << 4), }; +struct ftrace_func_entry { + struct hlist_node hlist; + unsigned long ip; +}; -static int ftrace_filtered; +struct ftrace_hash { + unsigned long size_bits; + struct hlist_head *buckets; + unsigned long count; + struct rcu_head rcu; +}; + +/* + * We make these constant because no one should touch them, + * but they are used as the default "empty hash", to avoid allocating + * it all the time. These are in a read only section such that if + * anyone does try to modify it, it will cause an exception. + */ +static const struct hlist_head empty_buckets[1]; +static const struct ftrace_hash empty_hash = { + .buckets = (struct hlist_head *)empty_buckets, +}; +#define EMPTY_HASH ((struct ftrace_hash *)&empty_hash) + +static struct ftrace_ops global_ops = { + .func = ftrace_stub, + .notrace_hash = EMPTY_HASH, + .filter_hash = EMPTY_HASH, +}; static struct dyn_ftrace *ftrace_new_addrs; @@ -912,6 +1003,269 @@ static struct ftrace_page *ftrace_pages; static struct dyn_ftrace *ftrace_free_records; +static struct ftrace_func_entry * +ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) +{ + unsigned long key; + struct ftrace_func_entry *entry; + struct hlist_head *hhd; + struct hlist_node *n; + + if (!hash->count) + return NULL; + + if (hash->size_bits > 0) + key = hash_long(ip, hash->size_bits); + else + key = 0; + + hhd = &hash->buckets[key]; + + hlist_for_each_entry_rcu(entry, n, hhd, hlist) { + if (entry->ip == ip) + return entry; + } + return NULL; +} + +static void __add_hash_entry(struct ftrace_hash *hash, + struct ftrace_func_entry *entry) +{ + struct hlist_head *hhd; + unsigned long key; + + if (hash->size_bits) + key = hash_long(entry->ip, hash->size_bits); + else + key = 0; + + hhd = &hash->buckets[key]; + hlist_add_head(&entry->hlist, hhd); + hash->count++; +} + +static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) +{ + struct ftrace_func_entry *entry; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->ip = ip; + __add_hash_entry(hash, entry); + + return 0; +} + +static void +free_hash_entry(struct ftrace_hash *hash, + struct ftrace_func_entry *entry) +{ + hlist_del(&entry->hlist); + kfree(entry); + hash->count--; +} + +static void +remove_hash_entry(struct ftrace_hash *hash, + struct ftrace_func_entry *entry) +{ + hlist_del(&entry->hlist); + hash->count--; +} + +static void ftrace_hash_clear(struct ftrace_hash *hash) +{ + struct hlist_head *hhd; + struct hlist_node *tp, *tn; + struct ftrace_func_entry *entry; + int size = 1 << hash->size_bits; + int i; + + if (!hash->count) + return; + + for (i = 0; i < size; i++) { + hhd = &hash->buckets[i]; + hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) + free_hash_entry(hash, entry); + } + FTRACE_WARN_ON(hash->count); +} + +static void free_ftrace_hash(struct ftrace_hash *hash) +{ + if (!hash || hash == EMPTY_HASH) + return; + ftrace_hash_clear(hash); + kfree(hash->buckets); + kfree(hash); +} + +static void __free_ftrace_hash_rcu(struct rcu_head *rcu) +{ + struct ftrace_hash *hash; + + hash = container_of(rcu, struct ftrace_hash, rcu); + free_ftrace_hash(hash); +} + +static void free_ftrace_hash_rcu(struct ftrace_hash *hash) +{ + if (!hash || hash == EMPTY_HASH) + return; + call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); +} + +static struct ftrace_hash *alloc_ftrace_hash(int size_bits) +{ + struct ftrace_hash *hash; + int size; + + hash = kzalloc(sizeof(*hash), GFP_KERNEL); + if (!hash) + return NULL; + + size = 1 << size_bits; + hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL); + + if (!hash->buckets) { + kfree(hash); + return NULL; + } + + hash->size_bits = size_bits; + + return hash; +} + +static struct ftrace_hash * +alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *new_hash; + struct hlist_node *tp; + int size; + int ret; + int i; + + new_hash = alloc_ftrace_hash(size_bits); + if (!new_hash) + return NULL; + + /* Empty hash? */ + if (!hash || !hash->count) + return new_hash; + + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) { + ret = add_hash_entry(new_hash, entry->ip); + if (ret < 0) + goto free_hash; + } + } + + FTRACE_WARN_ON(new_hash->count != hash->count); + + return new_hash; + + free_hash: + free_ftrace_hash(new_hash); + return NULL; +} + +static int +ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) +{ + struct ftrace_func_entry *entry; + struct hlist_node *tp, *tn; + struct hlist_head *hhd; + struct ftrace_hash *old_hash; + struct ftrace_hash *new_hash; + unsigned long key; + int size = src->count; + int bits = 0; + int i; + + /* + * If the new source is empty, just free dst and assign it + * the empty_hash. + */ + if (!src->count) { + free_ftrace_hash_rcu(*dst); + rcu_assign_pointer(*dst, EMPTY_HASH); + return 0; + } + + /* + * Make the hash size about 1/2 the # found + */ + for (size /= 2; size; size >>= 1) + bits++; + + /* Don't allocate too much */ + if (bits > FTRACE_HASH_MAX_BITS) + bits = FTRACE_HASH_MAX_BITS; + + new_hash = alloc_ftrace_hash(bits); + if (!new_hash) + return -ENOMEM; + + size = 1 << src->size_bits; + for (i = 0; i < size; i++) { + hhd = &src->buckets[i]; + hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) { + if (bits > 0) + key = hash_long(entry->ip, bits); + else + key = 0; + remove_hash_entry(src, entry); + __add_hash_entry(new_hash, entry); + } + } + + old_hash = *dst; + rcu_assign_pointer(*dst, new_hash); + free_ftrace_hash_rcu(old_hash); + + return 0; +} + +/* + * Test the hashes for this ops to see if we want to call + * the ops->func or not. + * + * It's a match if the ip is in the ops->filter_hash or + * the filter_hash does not exist or is empty, + * AND + * the ip is not in the ops->notrace_hash. + * + * This needs to be called with preemption disabled as + * the hashes are freed with call_rcu_sched(). + */ +static int +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +{ + struct ftrace_hash *filter_hash; + struct ftrace_hash *notrace_hash; + int ret; + + filter_hash = rcu_dereference_raw(ops->filter_hash); + notrace_hash = rcu_dereference_raw(ops->notrace_hash); + + if ((!filter_hash || !filter_hash->count || + ftrace_lookup_ip(filter_hash, ip)) && + (!notrace_hash || !notrace_hash->count || + !ftrace_lookup_ip(notrace_hash, ip))) + ret = 1; + else + ret = 0; + + return ret; +} + /* * This is a double for. Do not use 'break' to break out of the loop, * you must use a goto. @@ -926,6 +1280,105 @@ static struct dyn_ftrace *ftrace_free_records; } \ } +static void __ftrace_hash_rec_update(struct ftrace_ops *ops, + int filter_hash, + bool inc) +{ + struct ftrace_hash *hash; + struct ftrace_hash *other_hash; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + int count = 0; + int all = 0; + + /* Only update if the ops has been registered */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return; + + /* + * In the filter_hash case: + * If the count is zero, we update all records. + * Otherwise we just update the items in the hash. + * + * In the notrace_hash case: + * We enable the update in the hash. + * As disabling notrace means enabling the tracing, + * and enabling notrace means disabling, the inc variable + * gets inversed. + */ + if (filter_hash) { + hash = ops->filter_hash; + other_hash = ops->notrace_hash; + if (!hash || !hash->count) + all = 1; + } else { + inc = !inc; + hash = ops->notrace_hash; + other_hash = ops->filter_hash; + /* + * If the notrace hash has no items, + * then there's nothing to do. + */ + if (hash && !hash->count) + return; + } + + do_for_each_ftrace_rec(pg, rec) { + int in_other_hash = 0; + int in_hash = 0; + int match = 0; + + if (all) { + /* + * Only the filter_hash affects all records. + * Update if the record is not in the notrace hash. + */ + if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) + match = 1; + } else { + in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip); + in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip); + + /* + * + */ + if (filter_hash && in_hash && !in_other_hash) + match = 1; + else if (!filter_hash && in_hash && + (in_other_hash || !other_hash->count)) + match = 1; + } + if (!match) + continue; + + if (inc) { + rec->flags++; + if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) + return; + } else { + if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) + return; + rec->flags--; + } + count++; + /* Shortcut, if we handled all records, we are done. */ + if (!all && count == hash->count) + return; + } while_for_each_ftrace_rec(); +} + +static void ftrace_hash_rec_disable(struct ftrace_ops *ops, + int filter_hash) +{ + __ftrace_hash_rec_update(ops, filter_hash, 0); +} + +static void ftrace_hash_rec_enable(struct ftrace_ops *ops, + int filter_hash) +{ + __ftrace_hash_rec_update(ops, filter_hash, 1); +} + static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->freelist = ftrace_free_records; @@ -1047,18 +1500,18 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) ftrace_addr = (unsigned long)FTRACE_ADDR; /* - * If this record is not to be traced or we want to disable it, - * then disable it. + * If we are enabling tracing: + * + * If the record has a ref count, then we need to enable it + * because someone is using it. * - * If we want to enable it and filtering is off, then enable it. + * Otherwise we make sure its disabled. * - * If we want to enable it and filtering is on, enable it only if - * it's filtered + * If we are disabling tracing, then disable all records that + * are enabled. */ - if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) { - if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER)) - flag = FTRACE_FL_ENABLED; - } + if (enable && (rec->flags & ~FTRACE_FL_MASK)) + flag = FTRACE_FL_ENABLED; /* If the state of this record hasn't changed, then do nothing */ if ((rec->flags & FTRACE_FL_ENABLED) == flag) @@ -1079,19 +1532,16 @@ static void ftrace_replace_code(int enable) struct ftrace_page *pg; int failed; + if (unlikely(ftrace_disabled)) + return; + do_for_each_ftrace_rec(pg, rec) { - /* - * Skip over free records, records that have - * failed and not converted. - */ - if (rec->flags & FTRACE_FL_FREE || - rec->flags & FTRACE_FL_FAILED || - !(rec->flags & FTRACE_FL_CONVERTED)) + /* Skip over free records */ + if (rec->flags & FTRACE_FL_FREE) continue; failed = __ftrace_replace_code(rec, enable); if (failed) { - rec->flags |= FTRACE_FL_FAILED; ftrace_bug(failed, rec->ip); /* Stop processing */ return; @@ -1107,10 +1557,12 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) ip = rec->ip; + if (unlikely(ftrace_disabled)) + return 0; + ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); if (ret) { ftrace_bug(ret, ip); - rec->flags |= FTRACE_FL_FAILED; return 0; } return 1; @@ -1171,6 +1623,7 @@ static void ftrace_run_update_code(int command) static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; +static int global_start_up; static void ftrace_startup_enable(int command) { @@ -1185,19 +1638,36 @@ static void ftrace_startup_enable(int command) ftrace_run_update_code(command); } -static void ftrace_startup(int command) +static void ftrace_startup(struct ftrace_ops *ops, int command) { + bool hash_enable = true; + if (unlikely(ftrace_disabled)) return; ftrace_start_up++; command |= FTRACE_ENABLE_CALLS; + /* ops marked global share the filter hashes */ + if (ops->flags & FTRACE_OPS_FL_GLOBAL) { + ops = &global_ops; + /* Don't update hash if global is already set */ + if (global_start_up) + hash_enable = false; + global_start_up++; + } + + ops->flags |= FTRACE_OPS_FL_ENABLED; + if (hash_enable) + ftrace_hash_rec_enable(ops, 1); + ftrace_startup_enable(command); } -static void ftrace_shutdown(int command) +static void ftrace_shutdown(struct ftrace_ops *ops, int command) { + bool hash_disable = true; + if (unlikely(ftrace_disabled)) return; @@ -1209,6 +1679,23 @@ static void ftrace_shutdown(int command) */ WARN_ON_ONCE(ftrace_start_up < 0); + if (ops->flags & FTRACE_OPS_FL_GLOBAL) { + ops = &global_ops; + global_start_up--; + WARN_ON_ONCE(global_start_up < 0); + /* Don't update hash if global still has users */ + if (global_start_up) { + WARN_ON_ONCE(!ftrace_start_up); + hash_disable = false; + } + } + + if (hash_disable) + ftrace_hash_rec_disable(ops, 1); + + if (ops != &global_ops || !global_start_up) + ops->flags &= ~FTRACE_OPS_FL_ENABLED; + if (!ftrace_start_up) command |= FTRACE_DISABLE_CALLS; @@ -1273,10 +1760,10 @@ static int ftrace_update_code(struct module *mod) */ if (!ftrace_code_disable(mod, p)) { ftrace_free_rec(p); - continue; + /* Game over */ + break; } - p->flags |= FTRACE_FL_CONVERTED; ftrace_update_cnt++; /* @@ -1351,9 +1838,9 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) enum { FTRACE_ITER_FILTER = (1 << 0), FTRACE_ITER_NOTRACE = (1 << 1), - FTRACE_ITER_FAILURES = (1 << 2), - FTRACE_ITER_PRINTALL = (1 << 3), - FTRACE_ITER_HASH = (1 << 4), + FTRACE_ITER_PRINTALL = (1 << 2), + FTRACE_ITER_HASH = (1 << 3), + FTRACE_ITER_ENABLED = (1 << 4), }; #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ @@ -1365,6 +1852,8 @@ struct ftrace_iterator { struct dyn_ftrace *func; struct ftrace_func_probe *probe; struct trace_parser parser; + struct ftrace_hash *hash; + struct ftrace_ops *ops; int hidx; int idx; unsigned flags; @@ -1461,8 +1950,12 @@ static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_iterator *iter = m->private; + struct ftrace_ops *ops = &global_ops; struct dyn_ftrace *rec = NULL; + if (unlikely(ftrace_disabled)) + return NULL; + if (iter->flags & FTRACE_ITER_HASH) return t_hash_next(m, pos); @@ -1483,17 +1976,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos) rec = &iter->pg->records[iter->idx++]; if ((rec->flags & FTRACE_FL_FREE) || - (!(iter->flags & FTRACE_ITER_FAILURES) && - (rec->flags & FTRACE_FL_FAILED)) || - - ((iter->flags & FTRACE_ITER_FAILURES) && - !(rec->flags & FTRACE_FL_FAILED)) || - ((iter->flags & FTRACE_ITER_FILTER) && - !(rec->flags & FTRACE_FL_FILTER)) || + !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || ((iter->flags & FTRACE_ITER_NOTRACE) && - !(rec->flags & FTRACE_FL_NOTRACE))) { + !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) || + + ((iter->flags & FTRACE_ITER_ENABLED) && + !(rec->flags & ~FTRACE_FL_MASK))) { + rec = NULL; goto retry; } @@ -1517,10 +2008,15 @@ static void reset_iter_read(struct ftrace_iterator *iter) static void *t_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; + struct ftrace_ops *ops = &global_ops; void *p = NULL; loff_t l; mutex_lock(&ftrace_lock); + + if (unlikely(ftrace_disabled)) + return NULL; + /* * If an lseek was done, then reset and start from beginning. */ @@ -1532,7 +2028,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) * off, we can short cut and just print out that all * functions are enabled. */ - if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) { + if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) { if (*pos > 0) return t_hash_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; @@ -1590,7 +2086,11 @@ static int t_show(struct seq_file *m, void *v) if (!rec) return 0; - seq_printf(m, "%ps\n", (void *)rec->ip); + seq_printf(m, "%ps", (void *)rec->ip); + if (iter->flags & FTRACE_ITER_ENABLED) + seq_printf(m, " (%ld)", + rec->flags & ~FTRACE_FL_MASK); + seq_printf(m, "\n"); return 0; } @@ -1630,44 +2130,46 @@ ftrace_avail_open(struct inode *inode, struct file *file) } static int -ftrace_failures_open(struct inode *inode, struct file *file) +ftrace_enabled_open(struct inode *inode, struct file *file) { - int ret; - struct seq_file *m; struct ftrace_iterator *iter; + int ret; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + iter->pg = ftrace_pages_start; + iter->flags = FTRACE_ITER_ENABLED; - ret = ftrace_avail_open(inode, file); + ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { - m = file->private_data; - iter = m->private; - iter->flags = FTRACE_ITER_FAILURES; + struct seq_file *m = file->private_data; + + m->private = iter; + } else { + kfree(iter); } return ret; } - -static void ftrace_filter_reset(int enable) +static void ftrace_filter_reset(struct ftrace_hash *hash) { - struct ftrace_page *pg; - struct dyn_ftrace *rec; - unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; - mutex_lock(&ftrace_lock); - if (enable) - ftrace_filtered = 0; - do_for_each_ftrace_rec(pg, rec) { - if (rec->flags & FTRACE_FL_FAILED) - continue; - rec->flags &= ~type; - } while_for_each_ftrace_rec(); + ftrace_hash_clear(hash); mutex_unlock(&ftrace_lock); } static int -ftrace_regex_open(struct inode *inode, struct file *file, int enable) +ftrace_regex_open(struct ftrace_ops *ops, int flag, + struct inode *inode, struct file *file) { struct ftrace_iterator *iter; + struct ftrace_hash *hash; int ret = 0; if (unlikely(ftrace_disabled)) @@ -1682,21 +2184,42 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) return -ENOMEM; } + if (flag & FTRACE_ITER_NOTRACE) + hash = ops->notrace_hash; + else + hash = ops->filter_hash; + + iter->ops = ops; + iter->flags = flag; + + if (file->f_mode & FMODE_WRITE) { + mutex_lock(&ftrace_lock); + iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash); + mutex_unlock(&ftrace_lock); + + if (!iter->hash) { + trace_parser_put(&iter->parser); + kfree(iter); + return -ENOMEM; + } + } + mutex_lock(&ftrace_regex_lock); + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - ftrace_filter_reset(enable); + ftrace_filter_reset(iter->hash); if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; - iter->flags = enable ? FTRACE_ITER_FILTER : - FTRACE_ITER_NOTRACE; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { struct seq_file *m = file->private_data; m->private = iter; } else { + /* Failed */ + free_ftrace_hash(iter->hash); trace_parser_put(&iter->parser); kfree(iter); } @@ -1710,13 +2233,15 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) static int ftrace_filter_open(struct inode *inode, struct file *file) { - return ftrace_regex_open(inode, file, 1); + return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER, + inode, file); } static int ftrace_notrace_open(struct inode *inode, struct file *file) { - return ftrace_regex_open(inode, file, 0); + return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE, + inode, file); } static loff_t @@ -1761,86 +2286,99 @@ static int ftrace_match(char *str, char *regex, int len, int type) } static int -ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type) +enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not) +{ + struct ftrace_func_entry *entry; + int ret = 0; + + entry = ftrace_lookup_ip(hash, rec->ip); + if (not) { + /* Do nothing if it doesn't exist */ + if (!entry) + return 0; + + free_hash_entry(hash, entry); + } else { + /* Do nothing if it exists */ + if (entry) + return 0; + + ret = add_hash_entry(hash, rec->ip); + } + return ret; +} + +static int +ftrace_match_record(struct dyn_ftrace *rec, char *mod, + char *regex, int len, int type) { char str[KSYM_SYMBOL_LEN]; + char *modname; + + kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); + + if (mod) { + /* module lookup requires matching the module */ + if (!modname || strcmp(modname, mod)) + return 0; + + /* blank search means to match all funcs in the mod */ + if (!len) + return 1; + } - kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); return ftrace_match(str, regex, len, type); } -static int ftrace_match_records(char *buff, int len, int enable) +static int +match_records(struct ftrace_hash *hash, char *buff, + int len, char *mod, int not) { - unsigned int search_len; + unsigned search_len = 0; struct ftrace_page *pg; struct dyn_ftrace *rec; - unsigned long flag; - char *search; - int type; - int not; + int type = MATCH_FULL; + char *search = buff; int found = 0; + int ret; - flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; - type = filter_parse_regex(buff, len, &search, ¬); - - search_len = strlen(search); + if (len) { + type = filter_parse_regex(buff, len, &search, ¬); + search_len = strlen(search); + } mutex_lock(&ftrace_lock); - do_for_each_ftrace_rec(pg, rec) { - if (rec->flags & FTRACE_FL_FAILED) - continue; + if (unlikely(ftrace_disabled)) + goto out_unlock; - if (ftrace_match_record(rec, search, search_len, type)) { - if (not) - rec->flags &= ~flag; - else - rec->flags |= flag; + do_for_each_ftrace_rec(pg, rec) { + + if (ftrace_match_record(rec, mod, search, search_len, type)) { + ret = enter_record(hash, rec, not); + if (ret < 0) { + found = ret; + goto out_unlock; + } found = 1; } - /* - * Only enable filtering if we have a function that - * is filtered on. - */ - if (enable && (rec->flags & FTRACE_FL_FILTER)) - ftrace_filtered = 1; } while_for_each_ftrace_rec(); + out_unlock: mutex_unlock(&ftrace_lock); return found; } static int -ftrace_match_module_record(struct dyn_ftrace *rec, char *mod, - char *regex, int len, int type) +ftrace_match_records(struct ftrace_hash *hash, char *buff, int len) { - char str[KSYM_SYMBOL_LEN]; - char *modname; - - kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); - - if (!modname || strcmp(modname, mod)) - return 0; - - /* blank search means to match all funcs in the mod */ - if (len) - return ftrace_match(str, regex, len, type); - else - return 1; + return match_records(hash, buff, len, NULL, 0); } -static int ftrace_match_module_records(char *buff, char *mod, int enable) +static int +ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod) { - unsigned search_len = 0; - struct ftrace_page *pg; - struct dyn_ftrace *rec; - int type = MATCH_FULL; - char *search = buff; - unsigned long flag; int not = 0; - int found = 0; - - flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; /* blank or '*' mean the same */ if (strcmp(buff, "*") == 0) @@ -1852,32 +2390,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable) not = 1; } - if (strlen(buff)) { - type = filter_parse_regex(buff, strlen(buff), &search, ¬); - search_len = strlen(search); - } - - mutex_lock(&ftrace_lock); - do_for_each_ftrace_rec(pg, rec) { - - if (rec->flags & FTRACE_FL_FAILED) - continue; - - if (ftrace_match_module_record(rec, mod, - search, search_len, type)) { - if (not) - rec->flags &= ~flag; - else - rec->flags |= flag; - found = 1; - } - if (enable && (rec->flags & FTRACE_FL_FILTER)) - ftrace_filtered = 1; - - } while_for_each_ftrace_rec(); - mutex_unlock(&ftrace_lock); - - return found; + return match_records(hash, buff, strlen(buff), mod, not); } /* @@ -1888,7 +2401,10 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable) static int ftrace_mod_callback(char *func, char *cmd, char *param, int enable) { + struct ftrace_ops *ops = &global_ops; + struct ftrace_hash *hash; char *mod; + int ret = -EINVAL; /* * cmd == 'mod' because we only registered this func @@ -1900,15 +2416,24 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable) /* we must have a module name */ if (!param) - return -EINVAL; + return ret; mod = strsep(¶m, ":"); if (!strlen(mod)) - return -EINVAL; + return ret; - if (ftrace_match_module_records(func, mod, enable)) - return 0; - return -EINVAL; + if (enable) + hash = ops->filter_hash; + else + hash = ops->notrace_hash; + + ret = ftrace_match_module_records(hash, func, mod); + if (!ret) + ret = -EINVAL; + if (ret < 0) + return ret; + + return 0; } static struct ftrace_func_command ftrace_mod_cmd = { @@ -1959,6 +2484,7 @@ static int ftrace_probe_registered; static void __enable_ftrace_function_probe(void) { + int ret; int i; if (ftrace_probe_registered) @@ -1973,13 +2499,16 @@ static void __enable_ftrace_function_probe(void) if (i == FTRACE_FUNC_HASHSIZE) return; - __register_ftrace_function(&trace_probe_ops); - ftrace_startup(0); + ret = __register_ftrace_function(&trace_probe_ops); + if (!ret) + ftrace_startup(&trace_probe_ops, 0); + ftrace_probe_registered = 1; } static void __disable_ftrace_function_probe(void) { + int ret; int i; if (!ftrace_probe_registered) @@ -1992,8 +2521,10 @@ static void __disable_ftrace_function_probe(void) } /* no more funcs left */ - __unregister_ftrace_function(&trace_probe_ops); - ftrace_shutdown(0); + ret = __unregister_ftrace_function(&trace_probe_ops); + if (!ret) + ftrace_shutdown(&trace_probe_ops, 0); + ftrace_probe_registered = 0; } @@ -2029,12 +2560,13 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, return -EINVAL; mutex_lock(&ftrace_lock); - do_for_each_ftrace_rec(pg, rec) { - if (rec->flags & FTRACE_FL_FAILED) - continue; + if (unlikely(ftrace_disabled)) + goto out_unlock; + + do_for_each_ftrace_rec(pg, rec) { - if (!ftrace_match_record(rec, search, len, type)) + if (!ftrace_match_record(rec, NULL, search, len, type)) continue; entry = kmalloc(sizeof(*entry), GFP_KERNEL); @@ -2195,18 +2727,22 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd) return ret; } -static int ftrace_process_regex(char *buff, int len, int enable) +static int ftrace_process_regex(struct ftrace_hash *hash, + char *buff, int len, int enable) { char *func, *command, *next = buff; struct ftrace_func_command *p; - int ret = -EINVAL; + int ret; func = strsep(&next, ":"); if (!next) { - if (ftrace_match_records(func, len, enable)) - return 0; - return ret; + ret = ftrace_match_records(hash, func, len); + if (!ret) + ret = -EINVAL; + if (ret < 0) + return ret; + return 0; } /* command found */ @@ -2239,6 +2775,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, mutex_lock(&ftrace_regex_lock); + ret = -ENODEV; + if (unlikely(ftrace_disabled)) + goto out_unlock; + if (file->f_mode & FMODE_READ) { struct seq_file *m = file->private_data; iter = m->private; @@ -2250,7 +2790,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, if (read >= 0 && trace_parser_loaded(parser) && !trace_parser_cont(parser)) { - ret = ftrace_process_regex(parser->buffer, + ret = ftrace_process_regex(iter->hash, parser->buffer, parser->idx, enable); trace_parser_clear(parser); if (ret) @@ -2278,22 +2818,49 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf, return ftrace_regex_write(file, ubuf, cnt, ppos, 0); } -static void -ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) +static int +ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, + int reset, int enable) { + struct ftrace_hash **orig_hash; + struct ftrace_hash *hash; + int ret; + + /* All global ops uses the global ops filters */ + if (ops->flags & FTRACE_OPS_FL_GLOBAL) + ops = &global_ops; + if (unlikely(ftrace_disabled)) - return; + return -ENODEV; + + if (enable) + orig_hash = &ops->filter_hash; + else + orig_hash = &ops->notrace_hash; + + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); + if (!hash) + return -ENOMEM; mutex_lock(&ftrace_regex_lock); if (reset) - ftrace_filter_reset(enable); + ftrace_filter_reset(hash); if (buf) - ftrace_match_records(buf, len, enable); + ftrace_match_records(hash, buf, len); + + mutex_lock(&ftrace_lock); + ret = ftrace_hash_move(orig_hash, hash); + mutex_unlock(&ftrace_lock); + mutex_unlock(&ftrace_regex_lock); + + free_ftrace_hash(hash); + return ret; } /** * ftrace_set_filter - set a function to filter on in ftrace + * @ops - the ops to set the filter with * @buf - the string that holds the function filter text. * @len - the length of the string. * @reset - non zero to reset all filters before applying this filter. @@ -2301,13 +2868,16 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. */ -void ftrace_set_filter(unsigned char *buf, int len, int reset) +void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset) { - ftrace_set_regex(buf, len, reset, 1); + ftrace_set_regex(ops, buf, len, reset, 1); } +EXPORT_SYMBOL_GPL(ftrace_set_filter); /** * ftrace_set_notrace - set a function to not trace in ftrace + * @ops - the ops to set the notrace filter with * @buf - the string that holds the function notrace text. * @len - the length of the string. * @reset - non zero to reset all filters before applying this filter. @@ -2316,10 +2886,44 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset) * is enabled. If @buf is NULL and reset is set, all functions will be enabled * for tracing. */ -void ftrace_set_notrace(unsigned char *buf, int len, int reset) +void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset) { - ftrace_set_regex(buf, len, reset, 0); + ftrace_set_regex(ops, buf, len, reset, 0); } +EXPORT_SYMBOL_GPL(ftrace_set_notrace); +/** + * ftrace_set_filter - set a function to filter on in ftrace + * @ops - the ops to set the filter with + * @buf - the string that holds the function filter text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled. + * If @buf is NULL and reset is set, all functions will be enabled for tracing. + */ +void ftrace_set_global_filter(unsigned char *buf, int len, int reset) +{ + ftrace_set_regex(&global_ops, buf, len, reset, 1); +} +EXPORT_SYMBOL_GPL(ftrace_set_global_filter); + +/** + * ftrace_set_notrace - set a function to not trace in ftrace + * @ops - the ops to set the notrace filter with + * @buf - the string that holds the function notrace text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Notrace Filters denote which functions should not be enabled when tracing + * is enabled. If @buf is NULL and reset is set, all functions will be enabled + * for tracing. + */ +void ftrace_set_global_notrace(unsigned char *buf, int len, int reset) +{ + ftrace_set_regex(&global_ops, buf, len, reset, 0); +} +EXPORT_SYMBOL_GPL(ftrace_set_global_notrace); /* * command line interface to allow users to set filters on boot up. @@ -2370,22 +2974,23 @@ static void __init set_ftrace_early_graph(char *buf) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static void __init set_ftrace_early_filter(char *buf, int enable) +static void __init +set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) { char *func; while (buf) { func = strsep(&buf, ","); - ftrace_set_regex(func, strlen(func), 0, enable); + ftrace_set_regex(ops, func, strlen(func), 0, enable); } } static void __init set_ftrace_early_filters(void) { if (ftrace_filter_buf[0]) - set_ftrace_early_filter(ftrace_filter_buf, 1); + set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1); if (ftrace_notrace_buf[0]) - set_ftrace_early_filter(ftrace_notrace_buf, 0); + set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_graph_buf[0]) set_ftrace_early_graph(ftrace_graph_buf); @@ -2393,11 +2998,14 @@ static void __init set_ftrace_early_filters(void) } static int -ftrace_regex_release(struct inode *inode, struct file *file, int enable) +ftrace_regex_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; struct ftrace_iterator *iter; + struct ftrace_hash **orig_hash; struct trace_parser *parser; + int filter_hash; + int ret; mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { @@ -2410,33 +3018,41 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) parser = &iter->parser; if (trace_parser_loaded(parser)) { parser->buffer[parser->idx] = 0; - ftrace_match_records(parser->buffer, parser->idx, enable); + ftrace_match_records(iter->hash, parser->buffer, parser->idx); } - mutex_lock(&ftrace_lock); - if (ftrace_start_up && ftrace_enabled) - ftrace_run_update_code(FTRACE_ENABLE_CALLS); - mutex_unlock(&ftrace_lock); - trace_parser_put(parser); + + if (file->f_mode & FMODE_WRITE) { + filter_hash = !!(iter->flags & FTRACE_ITER_FILTER); + + if (filter_hash) + orig_hash = &iter->ops->filter_hash; + else + orig_hash = &iter->ops->notrace_hash; + + mutex_lock(&ftrace_lock); + /* + * Remove the current set, update the hash and add + * them back. + */ + ftrace_hash_rec_disable(iter->ops, filter_hash); + ret = ftrace_hash_move(orig_hash, iter->hash); + if (!ret) { + ftrace_hash_rec_enable(iter->ops, filter_hash); + if (iter->ops->flags & FTRACE_OPS_FL_ENABLED + && ftrace_enabled) + ftrace_run_update_code(FTRACE_ENABLE_CALLS); + } + mutex_unlock(&ftrace_lock); + } + free_ftrace_hash(iter->hash); kfree(iter); mutex_unlock(&ftrace_regex_lock); return 0; } -static int -ftrace_filter_release(struct inode *inode, struct file *file) -{ - return ftrace_regex_release(inode, file, 1); -} - -static int -ftrace_notrace_release(struct inode *inode, struct file *file) -{ - return ftrace_regex_release(inode, file, 0); -} - static const struct file_operations ftrace_avail_fops = { .open = ftrace_avail_open, .read = seq_read, @@ -2444,8 +3060,8 @@ static const struct file_operations ftrace_avail_fops = { .release = seq_release_private, }; -static const struct file_operations ftrace_failures_fops = { - .open = ftrace_failures_open, +static const struct file_operations ftrace_enabled_fops = { + .open = ftrace_enabled_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, @@ -2456,7 +3072,7 @@ static const struct file_operations ftrace_filter_fops = { .read = seq_read, .write = ftrace_filter_write, .llseek = ftrace_regex_lseek, - .release = ftrace_filter_release, + .release = ftrace_regex_release, }; static const struct file_operations ftrace_notrace_fops = { @@ -2464,7 +3080,7 @@ static const struct file_operations ftrace_notrace_fops = { .read = seq_read, .write = ftrace_notrace_write, .llseek = ftrace_regex_lseek, - .release = ftrace_notrace_release, + .release = ftrace_regex_release, }; #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -2573,9 +3189,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) bool exists; int i; - if (ftrace_disabled) - return -ENODEV; - /* decode regex */ type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) @@ -2584,12 +3197,18 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) search_len = strlen(search); mutex_lock(&ftrace_lock); + + if (unlikely(ftrace_disabled)) { + mutex_unlock(&ftrace_lock); + return -ENODEV; + } + do_for_each_ftrace_rec(pg, rec) { - if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) + if (rec->flags & FTRACE_FL_FREE) continue; - if (ftrace_match_record(rec, search, search_len, type)) { + if (ftrace_match_record(rec, NULL, search, search_len, type)) { /* if it is in the array */ exists = false; for (i = 0; i < *idx; i++) { @@ -2679,8 +3298,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) trace_create_file("available_filter_functions", 0444, d_tracer, NULL, &ftrace_avail_fops); - trace_create_file("failures", 0444, - d_tracer, NULL, &ftrace_failures_fops); + trace_create_file("enabled_functions", 0444, + d_tracer, NULL, &ftrace_enabled_fops); trace_create_file("set_ftrace_filter", 0644, d_tracer, NULL, &ftrace_filter_fops); @@ -2703,7 +3322,6 @@ static int ftrace_process_locs(struct module *mod, { unsigned long *p; unsigned long addr; - unsigned long flags; mutex_lock(&ftrace_lock); p = start; @@ -2720,10 +3338,7 @@ static int ftrace_process_locs(struct module *mod, ftrace_record_ip(addr); } - /* disable interrupts to prevent kstop machine */ - local_irq_save(flags); ftrace_update_code(mod); - local_irq_restore(flags); mutex_unlock(&ftrace_lock); return 0; @@ -2735,10 +3350,11 @@ void ftrace_release_mod(struct module *mod) struct dyn_ftrace *rec; struct ftrace_page *pg; + mutex_lock(&ftrace_lock); + if (ftrace_disabled) - return; + goto out_unlock; - mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if (within_module_core(rec->ip, mod)) { /* @@ -2749,6 +3365,7 @@ void ftrace_release_mod(struct module *mod) ftrace_free_rec(rec); } } while_for_each_ftrace_rec(); + out_unlock: mutex_unlock(&ftrace_lock); } @@ -2835,6 +3452,10 @@ void __init ftrace_init(void) #else +static struct ftrace_ops global_ops = { + .func = ftrace_stub, +}; + static int __init ftrace_nodyn_init(void) { ftrace_enabled = 1; @@ -2845,12 +3466,38 @@ device_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(command) do { } while (0) -# define ftrace_shutdown(command) do { } while (0) +# define ftrace_startup(ops, command) do { } while (0) +# define ftrace_shutdown(ops, command) do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) + +static inline int +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +{ + return 1; +} + #endif /* CONFIG_DYNAMIC_FTRACE */ +static void +ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_ops *op; + + /* + * Some of the ops may be dynamically allocated, + * they must be freed after a synchronize_sched(). + */ + preempt_disable_notrace(); + op = rcu_dereference_raw(ftrace_ops_list); + while (op != &ftrace_list_end) { + if (ftrace_ops_test(op, ip)) + op->func(ip, parent_ip); + op = rcu_dereference_raw(op->next); + }; + preempt_enable_notrace(); +} + static void clear_ftrace_swapper(void) { struct task_struct *p; @@ -3143,19 +3790,23 @@ void ftrace_kill(void) */ int register_ftrace_function(struct ftrace_ops *ops) { - int ret; - - if (unlikely(ftrace_disabled)) - return -1; + int ret = -1; mutex_lock(&ftrace_lock); + if (unlikely(ftrace_disabled)) + goto out_unlock; + ret = __register_ftrace_function(ops); - ftrace_startup(0); + if (!ret) + ftrace_startup(ops, 0); + + out_unlock: mutex_unlock(&ftrace_lock); return ret; } +EXPORT_SYMBOL_GPL(register_ftrace_function); /** * unregister_ftrace_function - unregister a function for profiling. @@ -3169,25 +3820,27 @@ int unregister_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_lock); ret = __unregister_ftrace_function(ops); - ftrace_shutdown(0); + if (!ret) + ftrace_shutdown(ops, 0); mutex_unlock(&ftrace_lock); return ret; } +EXPORT_SYMBOL_GPL(unregister_ftrace_function); int ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret; - - if (unlikely(ftrace_disabled)) - return -ENODEV; + int ret = -ENODEV; mutex_lock(&ftrace_lock); - ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (unlikely(ftrace_disabled)) + goto out; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; @@ -3199,11 +3852,11 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ftrace_startup_sysctl(); /* we are starting ftrace again */ - if (ftrace_list != &ftrace_list_end) { - if (ftrace_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_list->func; + if (ftrace_ops_list != &ftrace_list_end) { + if (ftrace_ops_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_ops_list->func; else - ftrace_trace_function = ftrace_list_func; + ftrace_trace_function = ftrace_ops_list_func; } } else { @@ -3392,7 +4045,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_graph_return = retfunc; ftrace_graph_entry = entryfunc; - ftrace_startup(FTRACE_START_FUNC_RET); + ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); out: mutex_unlock(&ftrace_lock); @@ -3409,7 +4062,7 @@ void unregister_ftrace_graph(void) ftrace_graph_active--; ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; - ftrace_shutdown(FTRACE_STOP_FUNC_RET); + ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1cb49be7c7f..ee9c921d7f2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2014,9 +2014,10 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) { enum print_line_t ret; - if (iter->lost_events) - trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", - iter->cpu, iter->lost_events); + if (iter->lost_events && + !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", + iter->cpu, iter->lost_events)) + return TRACE_TYPE_PARTIAL_LINE; if (iter->trace && iter->trace->print_line) { ret = iter->trace->print_line(iter); @@ -3230,6 +3231,14 @@ waitagain: if (iter->seq.len >= cnt) break; + + /* + * Setting the full flag means we reached the trace_seq buffer + * size and we should leave by partial output condition above. + * One of the trace_seq_* functions is not used properly. + */ + WARN_ONCE(iter->seq.full, "full flag set for trace type %d", + iter->ent->type); } trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5e9dfc6286d..6b69c4bd306 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -419,6 +419,8 @@ extern void trace_find_cmdline(int pid, char comm[]); extern unsigned long ftrace_update_tot_cnt; #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); +#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 +extern int DYN_FTRACE_TEST_NAME2(void); #endif extern int ring_buffer_expanded; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 16aee4d44e8..8d0e1cc4e97 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -149,11 +149,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, + .flags = FTRACE_OPS_FL_GLOBAL, }; static struct ftrace_ops trace_stack_ops __read_mostly = { .func = function_stack_trace_call, + .flags = FTRACE_OPS_FL_GLOBAL, }; /* Our two options */ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a4969b47afc..c77424be284 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -153,6 +153,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = irqsoff_tracer_call, + .flags = FTRACE_OPS_FL_GLOBAL, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 456be9063c2..cf535ccedc8 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -830,6 +830,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event); enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, struct trace_event *event) { + if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type)) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; } diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 2547d8813cf..dff763b7baf 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -32,7 +32,7 @@ static DEFINE_MUTEX(btrace_mutex); struct trace_bprintk_fmt { struct list_head list; - char fmt[0]; + const char *fmt; }; static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) @@ -49,6 +49,7 @@ static void hold_module_trace_bprintk_format(const char **start, const char **end) { const char **iter; + char *fmt; mutex_lock(&btrace_mutex); for (iter = start; iter < end; iter++) { @@ -58,14 +59,18 @@ void hold_module_trace_bprintk_format(const char **start, const char **end) continue; } - tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) - + strlen(*iter) + 1, GFP_KERNEL); - if (tb_fmt) { + tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL); + if (tb_fmt) + fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL); + if (tb_fmt && fmt) { list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); - strcpy(tb_fmt->fmt, *iter); + strcpy(fmt, *iter); + tb_fmt->fmt = fmt; *iter = tb_fmt->fmt; - } else + } else { + kfree(tb_fmt); *iter = NULL; + } } mutex_unlock(&btrace_mutex); } @@ -84,6 +89,76 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self, return 0; } +/* + * The debugfs/tracing/printk_formats file maps the addresses with + * the ASCII formats that are used in the bprintk events in the + * buffer. For userspace tools to be able to decode the events from + * the buffer, they need to be able to map the address with the format. + * + * The addresses of the bprintk formats are in their own section + * __trace_printk_fmt. But for modules we copy them into a link list. + * The code to print the formats and their addresses passes around the + * address of the fmt string. If the fmt address passed into the seq + * functions is within the kernel core __trace_printk_fmt section, then + * it simply uses the next pointer in the list. + * + * When the fmt pointer is outside the kernel core __trace_printk_fmt + * section, then we need to read the link list pointers. The trick is + * we pass the address of the string to the seq function just like + * we do for the kernel core formats. To get back the structure that + * holds the format, we simply use containerof() and then go to the + * next format in the list. + */ +static const char ** +find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos) +{ + struct trace_bprintk_fmt *mod_fmt; + + if (list_empty(&trace_bprintk_fmt_list)) + return NULL; + + /* + * v will point to the address of the fmt record from t_next + * v will be NULL from t_start. + * If this is the first pointer or called from start + * then we need to walk the list. + */ + if (!v || start_index == *pos) { + struct trace_bprintk_fmt *p; + + /* search the module list */ + list_for_each_entry(p, &trace_bprintk_fmt_list, list) { + if (start_index == *pos) + return &p->fmt; + start_index++; + } + /* pos > index */ + return NULL; + } + + /* + * v points to the address of the fmt field in the mod list + * structure that holds the module print format. + */ + mod_fmt = container_of(v, typeof(*mod_fmt), fmt); + if (mod_fmt->list.next == &trace_bprintk_fmt_list) + return NULL; + + mod_fmt = container_of(mod_fmt->list.next, typeof(*mod_fmt), list); + + return &mod_fmt->fmt; +} + +static void format_mod_start(void) +{ + mutex_lock(&btrace_mutex); +} + +static void format_mod_stop(void) +{ + mutex_unlock(&btrace_mutex); +} + #else /* !CONFIG_MODULES */ __init static int module_trace_bprintk_format_notify(struct notifier_block *self, @@ -91,6 +166,13 @@ module_trace_bprintk_format_notify(struct notifier_block *self, { return 0; } +static inline const char ** +find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos) +{ + return NULL; +} +static inline void format_mod_start(void) { } +static inline void format_mod_stop(void) { } #endif /* CONFIG_MODULES */ @@ -153,20 +235,33 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) } EXPORT_SYMBOL_GPL(__ftrace_vprintk); +static const char **find_next(void *v, loff_t *pos) +{ + const char **fmt = v; + int start_index; + + if (!fmt) + fmt = __start___trace_bprintk_fmt + *pos; + + start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; + + if (*pos < start_index) + return fmt; + + return find_next_mod_format(start_index, v, fmt, pos); +} + static void * t_start(struct seq_file *m, loff_t *pos) { - const char **fmt = __start___trace_bprintk_fmt + *pos; - - if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) - return NULL; - return fmt; + format_mod_start(); + return find_next(NULL, pos); } static void *t_next(struct seq_file *m, void * v, loff_t *pos) { (*pos)++; - return t_start(m, pos); + return find_next(v, pos); } static int t_show(struct seq_file *m, void *v) @@ -205,6 +300,7 @@ static int t_show(struct seq_file *m, void *v) static void t_stop(struct seq_file *m, void *p) { + format_mod_stop(); } static const struct seq_operations show_format_seq_ops = { diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 7319559ed59..f029dd4fd2c 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -129,6 +129,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = wakeup_tracer_call, + .flags = FTRACE_OPS_FL_GLOBAL, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 659732eba07..288541f977f 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -101,6 +101,206 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) #ifdef CONFIG_DYNAMIC_FTRACE +static int trace_selftest_test_probe1_cnt; +static void trace_selftest_test_probe1_func(unsigned long ip, + unsigned long pip) +{ + trace_selftest_test_probe1_cnt++; +} + +static int trace_selftest_test_probe2_cnt; +static void trace_selftest_test_probe2_func(unsigned long ip, + unsigned long pip) +{ + trace_selftest_test_probe2_cnt++; +} + +static int trace_selftest_test_probe3_cnt; +static void trace_selftest_test_probe3_func(unsigned long ip, + unsigned long pip) +{ + trace_selftest_test_probe3_cnt++; +} + +static int trace_selftest_test_global_cnt; +static void trace_selftest_test_global_func(unsigned long ip, + unsigned long pip) +{ + trace_selftest_test_global_cnt++; +} + +static int trace_selftest_test_dyn_cnt; +static void trace_selftest_test_dyn_func(unsigned long ip, + unsigned long pip) +{ + trace_selftest_test_dyn_cnt++; +} + +static struct ftrace_ops test_probe1 = { + .func = trace_selftest_test_probe1_func, +}; + +static struct ftrace_ops test_probe2 = { + .func = trace_selftest_test_probe2_func, +}; + +static struct ftrace_ops test_probe3 = { + .func = trace_selftest_test_probe3_func, +}; + +static struct ftrace_ops test_global = { + .func = trace_selftest_test_global_func, + .flags = FTRACE_OPS_FL_GLOBAL, +}; + +static void print_counts(void) +{ + printk("(%d %d %d %d %d) ", + trace_selftest_test_probe1_cnt, + trace_selftest_test_probe2_cnt, + trace_selftest_test_probe3_cnt, + trace_selftest_test_global_cnt, + trace_selftest_test_dyn_cnt); +} + +static void reset_counts(void) +{ + trace_selftest_test_probe1_cnt = 0; + trace_selftest_test_probe2_cnt = 0; + trace_selftest_test_probe3_cnt = 0; + trace_selftest_test_global_cnt = 0; + trace_selftest_test_dyn_cnt = 0; +} + +static int trace_selftest_ops(int cnt) +{ + int save_ftrace_enabled = ftrace_enabled; + struct ftrace_ops *dyn_ops; + char *func1_name; + char *func2_name; + int len1; + int len2; + int ret = -1; + + printk(KERN_CONT "PASSED\n"); + pr_info("Testing dynamic ftrace ops #%d: ", cnt); + + ftrace_enabled = 1; + reset_counts(); + + /* Handle PPC64 '.' name */ + func1_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + func2_name = "*" __stringify(DYN_FTRACE_TEST_NAME2); + len1 = strlen(func1_name); + len2 = strlen(func2_name); + + /* + * Probe 1 will trace function 1. + * Probe 2 will trace function 2. + * Probe 3 will trace functions 1 and 2. + */ + ftrace_set_filter(&test_probe1, func1_name, len1, 1); + ftrace_set_filter(&test_probe2, func2_name, len2, 1); + ftrace_set_filter(&test_probe3, func1_name, len1, 1); + ftrace_set_filter(&test_probe3, func2_name, len2, 0); + + register_ftrace_function(&test_probe1); + register_ftrace_function(&test_probe2); + register_ftrace_function(&test_probe3); + register_ftrace_function(&test_global); + + DYN_FTRACE_TEST_NAME(); + + print_counts(); + + if (trace_selftest_test_probe1_cnt != 1) + goto out; + if (trace_selftest_test_probe2_cnt != 0) + goto out; + if (trace_selftest_test_probe3_cnt != 1) + goto out; + if (trace_selftest_test_global_cnt == 0) + goto out; + + DYN_FTRACE_TEST_NAME2(); + + print_counts(); + + if (trace_selftest_test_probe1_cnt != 1) + goto out; + if (trace_selftest_test_probe2_cnt != 1) + goto out; + if (trace_selftest_test_probe3_cnt != 2) + goto out; + + /* Add a dynamic probe */ + dyn_ops = kzalloc(sizeof(*dyn_ops), GFP_KERNEL); + if (!dyn_ops) { + printk("MEMORY ERROR "); + goto out; + } + + dyn_ops->func = trace_selftest_test_dyn_func; + + register_ftrace_function(dyn_ops); + + trace_selftest_test_global_cnt = 0; + + DYN_FTRACE_TEST_NAME(); + + print_counts(); + + if (trace_selftest_test_probe1_cnt != 2) + goto out_free; + if (trace_selftest_test_probe2_cnt != 1) + goto out_free; + if (trace_selftest_test_probe3_cnt != 3) + goto out_free; + if (trace_selftest_test_global_cnt == 0) + goto out; + if (trace_selftest_test_dyn_cnt == 0) + goto out_free; + + DYN_FTRACE_TEST_NAME2(); + + print_counts(); + + if (trace_selftest_test_probe1_cnt != 2) + goto out_free; + if (trace_selftest_test_probe2_cnt != 2) + goto out_free; + if (trace_selftest_test_probe3_cnt != 4) + goto out_free; + + ret = 0; + out_free: + unregister_ftrace_function(dyn_ops); + kfree(dyn_ops); + + out: + /* Purposely unregister in the same order */ + unregister_ftrace_function(&test_probe1); + unregister_ftrace_function(&test_probe2); + unregister_ftrace_function(&test_probe3); + unregister_ftrace_function(&test_global); + + /* Make sure everything is off */ + reset_counts(); + DYN_FTRACE_TEST_NAME(); + DYN_FTRACE_TEST_NAME(); + + if (trace_selftest_test_probe1_cnt || + trace_selftest_test_probe2_cnt || + trace_selftest_test_probe3_cnt || + trace_selftest_test_global_cnt || + trace_selftest_test_dyn_cnt) + ret = -1; + + ftrace_enabled = save_ftrace_enabled; + + return ret; +} + /* Test dynamic code modification and ftrace filters */ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, struct trace_array *tr, @@ -131,7 +331,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); /* filter only on our function */ - ftrace_set_filter(func_name, strlen(func_name), 1); + ftrace_set_global_filter(func_name, strlen(func_name), 1); /* enable tracing */ ret = tracer_init(trace, tr); @@ -166,22 +366,30 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* check the trace buffer */ ret = trace_test_buffer(tr, &count); - trace->reset(tr); tracing_start(); /* we should only have one item */ if (!ret && count != 1) { + trace->reset(tr); printk(KERN_CONT ".. filter failed count=%ld ..", count); ret = -1; goto out; } + /* Test the ops with global tracing running */ + ret = trace_selftest_ops(1); + trace->reset(tr); + out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; /* Enable tracing on all functions again */ - ftrace_set_filter(NULL, 0, 1); + ftrace_set_global_filter(NULL, 0, 1); + + /* Test the ops with global tracing off */ + if (!ret) + ret = trace_selftest_ops(2); return ret; } diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c index 54dd77cce5b..b4c475a0a48 100644 --- a/kernel/trace/trace_selftest_dynamic.c +++ b/kernel/trace/trace_selftest_dynamic.c @@ -5,3 +5,9 @@ int DYN_FTRACE_TEST_NAME(void) /* used to call mcount */ return 0; } + +int DYN_FTRACE_TEST_NAME2(void) +{ + /* used to call mcount */ + return 0; +} diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 4c5dead0c23..b0b53b8e4c2 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -133,6 +133,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = stack_trace_call, + .flags = FTRACE_OPS_FL_GLOBAL, }; static ssize_t diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 68187af4889..b219f1449c5 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -251,9 +251,9 @@ static void set_tracepoint(struct tracepoint_entry **entry, { WARN_ON(strcmp((*entry)->name, elem->name) != 0); - if (elem->regfunc && !elem->state && active) + if (elem->regfunc && !jump_label_enabled(&elem->key) && active) elem->regfunc(); - else if (elem->unregfunc && elem->state && !active) + else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) elem->unregfunc(); /* @@ -264,13 +264,10 @@ static void set_tracepoint(struct tracepoint_entry **entry, * is used. */ rcu_assign_pointer(elem->funcs, (*entry)->funcs); - if (!elem->state && active) { - jump_label_enable(&elem->state); - elem->state = active; - } else if (elem->state && !active) { - jump_label_disable(&elem->state); - elem->state = active; - } + if (active && !jump_label_enabled(&elem->key)) + jump_label_inc(&elem->key); + else if (!active && jump_label_enabled(&elem->key)) + jump_label_dec(&elem->key); } /* @@ -281,13 +278,11 @@ static void set_tracepoint(struct tracepoint_entry **entry, */ static void disable_tracepoint(struct tracepoint *elem) { - if (elem->unregfunc && elem->state) + if (elem->unregfunc && jump_label_enabled(&elem->key)) elem->unregfunc(); - if (elem->state) { - jump_label_disable(&elem->state); - elem->state = 0; - } + if (jump_label_enabled(&elem->key)) + jump_label_dec(&elem->key); rcu_assign_pointer(elem->funcs, NULL); } |