diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/irq/devres.c | 16 | ||||
-rw-r--r-- | kernel/irq/handle.c | 50 | ||||
-rw-r--r-- | kernel/irq/manage.c | 189 | ||||
-rw-r--r-- | kernel/kprobes.c | 281 | ||||
-rw-r--r-- | kernel/module.c | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 23 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_sched_switch.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 8 |
14 files changed, 494 insertions, 115 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 32cbf2607cb..abf9cf3b95c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code) schedule(); } + exit_irq_thread(); + exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 38a25b8d8bf..d06df9c41cb 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data) } /** - * devm_request_irq - allocate an interrupt line for a managed device + * devm_request_threaded_irq - allocate an interrupt line for a managed device * @dev: device to request interrupt for * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs + * @thread_fn: function to be called in a threaded interrupt context. NULL + * for devices which handle everything in @handler * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device * @dev_id: A cookie passed back to the handler function @@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data) * If an IRQ allocated with this function needs to be freed * separately, dev_free_irq() must be used. */ -int devm_request_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, unsigned long irqflags, - const char *devname, void *dev_id) +int devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, + void *dev_id) { struct irq_devres *dr; int rc; @@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq, if (!dr) return -ENOMEM; - rc = request_irq(irq, handler, irqflags, devname, dev_id); + rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname, + dev_id); if (rc) { devres_free(dr); return rc; @@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, return 0; } -EXPORT_SYMBOL(devm_request_irq); +EXPORT_SYMBOL(devm_request_threaded_irq); /** * devm_free_irq - free an interrupt diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 343acecae62..d82142be8dd 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id) return IRQ_NONE; } +static void warn_no_thread(unsigned int irq, struct irqaction *action) +{ + if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags)) + return; + + printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD " + "but no thread function available.", irq, action->name); +} + DEFINE_TRACE(irq_handler_entry); DEFINE_TRACE(irq_handler_exit); @@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) trace_irq_handler_entry(irq, action); ret = action->handler(irq, action->dev_id); trace_irq_handler_exit(irq, action, ret); - if (ret == IRQ_HANDLED) + + switch (ret) { + case IRQ_WAKE_THREAD: + /* + * Set result to handled so the spurious check + * does not trigger. + */ + ret = IRQ_HANDLED; + + /* + * Catch drivers which return WAKE_THREAD but + * did not set up a thread function + */ + if (unlikely(!action->thread_fn)) { + warn_no_thread(irq, action); + break; + } + + /* + * Wake up the handler thread for this + * action. In case the thread crashed and was + * killed we just pretend that we handled the + * interrupt. The hardirq handler above has + * disabled the device interrupt, so no irq + * storm is lurking. + */ + if (likely(!test_bit(IRQTF_DIED, + &action->thread_flags))) { + set_bit(IRQTF_RUNTHREAD, &action->thread_flags); + wake_up_process(action->thread); + } + + /* Fall through to add to randomness */ + case IRQ_HANDLED: status |= action->flags; + break; + + default: + break; + } + retval |= ret; action = action->next; } while (action); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1516ab77355..7e2e7dd4cd2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -8,16 +8,15 @@ */ #include <linux/irq.h> +#include <linux/kthread.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/sched.h> #include "internals.h" -#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) -cpumask_var_t irq_default_affinity; - /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * @irq: interrupt number to wait for @@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq) /* Oops, that failed? */ } while (status & IRQ_INPROGRESS); + + /* + * We made sure that no hardirq handler is running. Now verify + * that no threaded handlers are active. + */ + wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); } EXPORT_SYMBOL(synchronize_irq); +#ifdef CONFIG_SMP +cpumask_var_t irq_default_affinity; + /** * irq_can_set_affinity - Check if the affinity of a given irq can be set * @irq: Interrupt to check @@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq) return 1; } +static void +irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) +{ + struct irqaction *action = desc->action; + + while (action) { + if (action->thread) + set_cpus_allowed_ptr(action->thread, cpumask); + action = action->next; + } +} + /** * irq_set_affinity - Set the irq affinity of a given irq * @irq: Interrupt to set affinity @@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); #endif + irq_set_thread_affinity(desc, cpumask); desc->status |= IRQ_AFFINITY_SET; spin_unlock_irqrestore(&desc->lock, flags); return 0; @@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq) spin_lock_irqsave(&desc->lock, flags); ret = setup_affinity(irq, desc); + if (!ret) + irq_set_thread_affinity(desc, desc->affinity); spin_unlock_irqrestore(&desc->lock, flags); return ret; @@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, return ret; } +static int irq_wait_for_interrupt(struct irqaction *action) +{ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + + if (test_and_clear_bit(IRQTF_RUNTHREAD, + &action->thread_flags)) { + __set_current_state(TASK_RUNNING); + return 0; + } + schedule(); + } + return -1; +} + +/* + * Interrupt handler thread + */ +static int irq_thread(void *data) +{ + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; + struct irqaction *action = data; + struct irq_desc *desc = irq_to_desc(action->irq); + int wake; + + sched_setscheduler(current, SCHED_FIFO, ¶m); + current->irqaction = action; + + while (!irq_wait_for_interrupt(action)) { + + atomic_inc(&desc->threads_active); + + spin_lock_irq(&desc->lock); + if (unlikely(desc->status & IRQ_DISABLED)) { + /* + * CHECKME: We might need a dedicated + * IRQ_THREAD_PENDING flag here, which + * retriggers the thread in check_irq_resend() + * but AFAICT IRQ_PENDING should be fine as it + * retriggers the interrupt itself --- tglx + */ + desc->status |= IRQ_PENDING; + spin_unlock_irq(&desc->lock); + } else { + spin_unlock_irq(&desc->lock); + + action->thread_fn(action->irq, action->dev_id); + } + + wake = atomic_dec_and_test(&desc->threads_active); + + if (wake && waitqueue_active(&desc->wait_for_threads)) + wake_up(&desc->wait_for_threads); + } + + /* + * Clear irqaction. Otherwise exit_irq_thread() would make + * fuzz about an active irq thread going into nirvana. + */ + current->irqaction = NULL; + return 0; +} + +/* + * Called from do_exit() + */ +void exit_irq_thread(void) +{ + struct task_struct *tsk = current; + + if (!tsk->irqaction) + return; + + printk(KERN_ERR + "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", + tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); + + /* + * Set the THREAD DIED flag to prevent further wakeups of the + * soon to be gone threaded handler. + */ + set_bit(IRQTF_DIED, &tsk->irqaction->flags); +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } /* + * Threaded handler ? + */ + if (new->thread_fn) { + struct task_struct *t; + + t = kthread_create(irq_thread, new, "irq/%d-%s", irq, + new->name); + if (IS_ERR(t)) + return PTR_ERR(t); + /* + * We keep the reference to the task struct even if + * the thread dies to avoid that the interrupt code + * references an already freed task_struct. + */ + get_task_struct(t); + new->thread = t; + wake_up_process(t); + } + + /* * The following block of code has to be executed atomically */ spin_lock_irqsave(&desc->lock, flags); @@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!shared) { irq_chip_set_defaults(desc->chip); + init_waitqueue_head(&desc->wait_for_threads); + /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, irq, new->flags & IRQF_TRIGGER_MASK); - if (ret) { - spin_unlock_irqrestore(&desc->lock, flags); - return ret; - } + if (ret) + goto out_thread; } else compat_irq_chip_set_default_handler(desc); #if defined(CONFIG_IRQ_PER_CPU) @@ -549,8 +676,19 @@ mismatch: dump_stack(); } #endif + ret = -EBUSY; + +out_thread: spin_unlock_irqrestore(&desc->lock, flags); - return -EBUSY; + if (new->thread) { + struct task_struct *t = new->thread; + + new->thread = NULL; + if (likely(!test_bit(IRQTF_DIED, &new->thread_flags))) + kthread_stop(t); + put_task_struct(t); + } + return ret; } /** @@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action, **action_ptr; + struct task_struct *irqthread; unsigned long flags; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); @@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) else desc->chip->disable(irq); } + + irqthread = action->thread; + action->thread = NULL; + spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); @@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) /* Make sure it's not being used on another CPU: */ synchronize_irq(irq); + if (irqthread) { + if (!test_bit(IRQTF_DIED, &action->thread_flags)) + kthread_stop(irqthread); + put_task_struct(irqthread); + } + #ifdef CONFIG_DEBUG_SHIRQ /* * It's a shared IRQ -- the driver ought to be prepared for an IRQ @@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id) EXPORT_SYMBOL(free_irq); /** - * request_irq - allocate an interrupt line + * request_threaded_irq - allocate an interrupt line * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs + * @handler: Function to be called when the IRQ occurs. + * Primary handler for threaded interrupts + * @thread_fn: Function called from the irq handler thread + * If NULL, no irq thread is created * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device * @dev_id: A cookie passed back to the handler function @@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq); * raises, you must take care both to initialise your hardware * and to set up the interrupt handler in the right order. * + * If you want to set up a threaded irq handler for your device + * then you need to supply @handler and @thread_fn. @handler ist + * still called in hard interrupt context and has to check + * whether the interrupt originates from the device. If yes it + * needs to disable the interrupt on the device and return + * IRQ_THREAD_WAKE which will wake up the handler thread and run + * @thread_fn. This split handler design is necessary to support + * shared interrupts. + * * Dev_id must be globally unique. Normally the address of the * device data structure is used as the cookie. Since the handler * receives this value it makes sense to use it. @@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq); * IRQF_TRIGGER_* Specify active edge(s) or level * */ -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) +int request_threaded_irq(unsigned int irq, irq_handler_t handler, + irq_handler_t thread_fn, unsigned long irqflags, + const char *devname, void *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler, return -ENOMEM; action->handler = handler; + action->thread_fn = thread_fn; action->flags = irqflags; action->name = devname; action->dev_id = dev_id; @@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler, #endif return retval; } -EXPORT_SYMBOL(request_irq); +EXPORT_SYMBOL(request_threaded_irq); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5016bfb682b..a5e74ddee0e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; /* NOTE: change this value only with kprobe_mutex held */ -static bool kprobe_enabled; +static bool kprobes_all_disarmed; static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; @@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe *kp; list_for_each_entry_rcu(kp, &p->list, list) { - if (kp->pre_handler && !kprobe_gone(kp)) { + if (kp->pre_handler && likely(!kprobe_disabled(kp))) { set_kprobe_instance(kp); if (kp->pre_handler(kp, regs)) return 1; @@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, struct kprobe *kp; list_for_each_entry_rcu(kp, &p->list, list) { - if (kp->post_handler && !kprobe_gone(kp)) { + if (kp->post_handler && likely(!kprobe_disabled(kp))) { set_kprobe_instance(kp); kp->post_handler(kp, regs, flags); reset_kprobe_instance(); @@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) } /* -* Add the new probe to old_p->list. Fail if this is the +* Add the new probe to ap->list. Fail if this is the * second jprobe at the address - two jprobes can't coexist */ -static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) +static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) { + BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); if (p->break_handler) { - if (old_p->break_handler) + if (ap->break_handler) return -EEXIST; - list_add_tail_rcu(&p->list, &old_p->list); - old_p->break_handler = aggr_break_handler; + list_add_tail_rcu(&p->list, &ap->list); + ap->break_handler = aggr_break_handler; } else - list_add_rcu(&p->list, &old_p->list); - if (p->post_handler && !old_p->post_handler) - old_p->post_handler = aggr_post_handler; + list_add_rcu(&p->list, &ap->list); + if (p->post_handler && !ap->post_handler) + ap->post_handler = aggr_post_handler; + + if (kprobe_disabled(ap) && !kprobe_disabled(p)) { + ap->flags &= ~KPROBE_FLAG_DISABLED; + if (!kprobes_all_disarmed) + /* Arm the breakpoint again. */ + arch_arm_kprobe(ap); + } return 0; } @@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) copy_kprobe(p, ap); flush_insn_slot(ap); ap->addr = p->addr; + ap->flags = p->flags; ap->pre_handler = aggr_pre_handler; ap->fault_handler = aggr_fault_handler; /* We don't care the kprobe which has gone. */ @@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) { int ret = 0; - struct kprobe *ap; + struct kprobe *ap = old_p; - if (kprobe_gone(old_p)) { + if (old_p->pre_handler != aggr_pre_handler) { + /* If old_p is not an aggr_probe, create new aggr_kprobe. */ + ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); + if (!ap) + return -ENOMEM; + add_aggr_kprobe(ap, old_p); + } + + if (kprobe_gone(ap)) { /* * Attempting to insert new probe at the same location that * had a probe in the module vaddr area which already * freed. So, the instruction slot has already been * released. We need a new slot for the new probe. */ - ret = arch_prepare_kprobe(old_p); + ret = arch_prepare_kprobe(ap); if (ret) + /* + * Even if fail to allocate new slot, don't need to + * free aggr_probe. It will be used next time, or + * freed by unregister_kprobe. + */ return ret; - } - if (old_p->pre_handler == aggr_pre_handler) { - copy_kprobe(old_p, p); - ret = add_new_kprobe(old_p, p); - ap = old_p; - } else { - ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); - if (!ap) { - if (kprobe_gone(old_p)) - arch_remove_kprobe(old_p); - return -ENOMEM; - } - add_aggr_kprobe(ap, old_p); - copy_kprobe(ap, p); - ret = add_new_kprobe(ap, p); - } - if (kprobe_gone(old_p)) { + /* - * If the old_p has gone, its breakpoint has been disarmed. - * We have to arm it again after preparing real kprobes. + * Clear gone flag to prevent allocating new slot again, and + * set disabled flag because it is not armed yet. */ - ap->flags &= ~KPROBE_FLAG_GONE; - if (kprobe_enabled) - arch_arm_kprobe(ap); + ap->flags = (ap->flags & ~KPROBE_FLAG_GONE) + | KPROBE_FLAG_DISABLED; } - return ret; + + copy_kprobe(ap, p); + return add_new_kprobe(ap, p); +} + +/* Try to disable aggr_kprobe, and return 1 if succeeded.*/ +static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p) +{ + struct kprobe *kp; + + list_for_each_entry_rcu(kp, &p->list, list) { + if (!kprobe_disabled(kp)) + /* + * There is an active probe on the list. + * We can't disable aggr_kprobe. + */ + return 0; + } + p->flags |= KPROBE_FLAG_DISABLED; + return 1; } static int __kprobes in_kprobes_functions(unsigned long addr) @@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p) return -EINVAL; } - p->flags = 0; + /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ + p->flags &= KPROBE_FLAG_DISABLED; + /* * Check if are we probing a module. */ @@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p) hlist_add_head_rcu(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); - if (kprobe_enabled) + if (!kprobes_all_disarmed && !kprobe_disabled(p)) arch_arm_kprobe(p); out_unlock_text: @@ -722,26 +748,39 @@ out: return ret; } +EXPORT_SYMBOL_GPL(register_kprobe); -/* - * Unregister a kprobe without a scheduler synchronization. - */ -static int __kprobes __unregister_kprobe_top(struct kprobe *p) +/* Check passed kprobe is valid and return kprobe in kprobe_table. */ +static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) { struct kprobe *old_p, *list_p; old_p = get_kprobe(p->addr); if (unlikely(!old_p)) - return -EINVAL; + return NULL; if (p != old_p) { list_for_each_entry_rcu(list_p, &old_p->list, list) if (list_p == p) /* kprobe p is a valid probe */ - goto valid_p; - return -EINVAL; + goto valid; + return NULL; } -valid_p: +valid: + return old_p; +} + +/* + * Unregister a kprobe without a scheduler synchronization. + */ +static int __kprobes __unregister_kprobe_top(struct kprobe *p) +{ + struct kprobe *old_p, *list_p; + + old_p = __get_valid_kprobe(p); + if (old_p == NULL) + return -EINVAL; + if (old_p == p || (old_p->pre_handler == aggr_pre_handler && list_is_singular(&old_p->list))) { @@ -750,7 +789,7 @@ valid_p: * enabled and not gone - otherwise, the breakpoint would * already have been removed. We save on flushing icache. */ - if (kprobe_enabled && !kprobe_gone(old_p)) { + if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) { mutex_lock(&text_mutex); arch_disarm_kprobe(p); mutex_unlock(&text_mutex); @@ -768,6 +807,11 @@ valid_p: } noclean: list_del_rcu(&p->list); + if (!kprobe_disabled(old_p)) { + try_to_disable_aggr_kprobe(old_p); + if (!kprobes_all_disarmed && kprobe_disabled(old_p)) + arch_disarm_kprobe(old_p); + } } return 0; } @@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num) } return ret; } +EXPORT_SYMBOL_GPL(register_kprobes); void __kprobes unregister_kprobe(struct kprobe *p) { unregister_kprobes(&p, 1); } +EXPORT_SYMBOL_GPL(unregister_kprobe); void __kprobes unregister_kprobes(struct kprobe **kps, int num) { @@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num) if (kps[i]->addr) __unregister_kprobe_bottom(kps[i]); } +EXPORT_SYMBOL_GPL(unregister_kprobes); static struct notifier_block kprobe_exceptions_nb = { .notifier_call = kprobe_exceptions_notify, @@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) } return ret; } +EXPORT_SYMBOL_GPL(register_jprobes); int __kprobes register_jprobe(struct jprobe *jp) { return register_jprobes(&jp, 1); } +EXPORT_SYMBOL_GPL(register_jprobe); void __kprobes unregister_jprobe(struct jprobe *jp) { unregister_jprobes(&jp, 1); } +EXPORT_SYMBOL_GPL(unregister_jprobe); void __kprobes unregister_jprobes(struct jprobe **jps, int num) { @@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num) __unregister_kprobe_bottom(&jps[i]->kp); } } +EXPORT_SYMBOL_GPL(unregister_jprobes); #ifdef CONFIG_KRETPROBES /* @@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) free_rp_inst(rp); return ret; } +EXPORT_SYMBOL_GPL(register_kretprobe); int __kprobes register_kretprobes(struct kretprobe **rps, int num) { @@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num) } return ret; } +EXPORT_SYMBOL_GPL(register_kretprobes); void __kprobes unregister_kretprobe(struct kretprobe *rp) { unregister_kretprobes(&rp, 1); } +EXPORT_SYMBOL_GPL(unregister_kretprobe); void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) { @@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) } } } +EXPORT_SYMBOL_GPL(unregister_kretprobes); #else /* CONFIG_KRETPROBES */ int __kprobes register_kretprobe(struct kretprobe *rp) { return -ENOSYS; } +EXPORT_SYMBOL_GPL(register_kretprobe); int __kprobes register_kretprobes(struct kretprobe **rps, int num) { return -ENOSYS; } +EXPORT_SYMBOL_GPL(register_kretprobes); + void __kprobes unregister_kretprobe(struct kretprobe *rp) { } +EXPORT_SYMBOL_GPL(unregister_kretprobe); void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) { } +EXPORT_SYMBOL_GPL(unregister_kretprobes); static int __kprobes pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) @@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, static void __kprobes kill_kprobe(struct kprobe *p) { struct kprobe *kp; + p->flags |= KPROBE_FLAG_GONE; if (p->pre_handler == aggr_pre_handler) { /* @@ -1173,8 +1234,8 @@ static int __init init_kprobes(void) } } - /* By default, kprobes are enabled */ - kprobe_enabled = true; + /* By default, kprobes are armed */ + kprobes_all_disarmed = false; err = arch_init_kprobes(); if (!err) @@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, else kprobe_type = "k"; if (sym) - seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, - sym, offset, (modname ? modname : " "), - (kprobe_gone(p) ? "[GONE]" : "")); + seq_printf(pi, "%p %s %s+0x%x %s %s%s\n", + p->addr, kprobe_type, sym, offset, + (modname ? modname : " "), + (kprobe_gone(p) ? "[GONE]" : ""), + ((kprobe_disabled(p) && !kprobe_gone(p)) ? + "[DISABLED]" : "")); else - seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, - (kprobe_gone(p) ? "[GONE]" : "")); + seq_printf(pi, "%p %s %p %s%s\n", + p->addr, kprobe_type, p->addr, + (kprobe_gone(p) ? "[GONE]" : ""), + ((kprobe_disabled(p) && !kprobe_gone(p)) ? + "[DISABLED]" : "")); } static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) @@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = { .release = seq_release, }; -static void __kprobes enable_all_kprobes(void) +/* Disable one kprobe */ +int __kprobes disable_kprobe(struct kprobe *kp) +{ + int ret = 0; + struct kprobe *p; + + mutex_lock(&kprobe_mutex); + + /* Check whether specified probe is valid. */ + p = __get_valid_kprobe(kp); + if (unlikely(p == NULL)) { + ret = -EINVAL; + goto out; + } + + /* If the probe is already disabled (or gone), just return */ + if (kprobe_disabled(kp)) + goto out; + + kp->flags |= KPROBE_FLAG_DISABLED; + if (p != kp) + /* When kp != p, p is always enabled. */ + try_to_disable_aggr_kprobe(p); + + if (!kprobes_all_disarmed && kprobe_disabled(p)) + arch_disarm_kprobe(p); +out: + mutex_unlock(&kprobe_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(disable_kprobe); + +/* Enable one kprobe */ +int __kprobes enable_kprobe(struct kprobe *kp) +{ + int ret = 0; + struct kprobe *p; + + mutex_lock(&kprobe_mutex); + + /* Check whether specified probe is valid. */ + p = __get_valid_kprobe(kp); + if (unlikely(p == NULL)) { + ret = -EINVAL; + goto out; + } + + if (kprobe_gone(kp)) { + /* This kprobe has gone, we couldn't enable it. */ + ret = -EINVAL; + goto out; + } + + if (!kprobes_all_disarmed && kprobe_disabled(p)) + arch_arm_kprobe(p); + + p->flags &= ~KPROBE_FLAG_DISABLED; + if (p != kp) + kp->flags &= ~KPROBE_FLAG_DISABLED; +out: + mutex_unlock(&kprobe_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(enable_kprobe); + +static void __kprobes arm_all_kprobes(void) { struct hlist_head *head; struct hlist_node *node; @@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void) mutex_lock(&kprobe_mutex); - /* If kprobes are already enabled, just return */ - if (kprobe_enabled) + /* If kprobes are armed, just return */ + if (!kprobes_all_disarmed) goto already_enabled; mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) - if (!kprobe_gone(p)) + if (!kprobe_disabled(p)) arch_arm_kprobe(p); } mutex_unlock(&text_mutex); - kprobe_enabled = true; + kprobes_all_disarmed = false; printk(KERN_INFO "Kprobes globally enabled\n"); already_enabled: @@ -1302,7 +1434,7 @@ already_enabled: return; } -static void __kprobes disable_all_kprobes(void) +static void __kprobes disarm_all_kprobes(void) { struct hlist_head *head; struct hlist_node *node; @@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void) mutex_lock(&kprobe_mutex); - /* If kprobes are already disabled, just return */ - if (!kprobe_enabled) + /* If kprobes are already disarmed, just return */ + if (kprobes_all_disarmed) goto already_disabled; - kprobe_enabled = false; + kprobes_all_disarmed = true; printk(KERN_INFO "Kprobes globally disabled\n"); mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { - if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) + if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) arch_disarm_kprobe(p); } } @@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file, { char buf[3]; - if (kprobe_enabled) + if (!kprobes_all_disarmed) buf[0] = '1'; else buf[0] = '0'; @@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file, case 'y': case 'Y': case '1': - enable_all_kprobes(); + arm_all_kprobes(); break; case 'n': case 'N': case '0': - disable_all_kprobes(); + disarm_all_kprobes(); break; } @@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init); module_init(init_kprobes); -EXPORT_SYMBOL_GPL(register_kprobe); -EXPORT_SYMBOL_GPL(unregister_kprobe); -EXPORT_SYMBOL_GPL(register_kprobes); -EXPORT_SYMBOL_GPL(unregister_kprobes); -EXPORT_SYMBOL_GPL(register_jprobe); -EXPORT_SYMBOL_GPL(unregister_jprobe); -EXPORT_SYMBOL_GPL(register_jprobes); -EXPORT_SYMBOL_GPL(unregister_jprobes); +/* defined in arch/.../kernel/kprobes.c */ EXPORT_SYMBOL_GPL(jprobe_return); -EXPORT_SYMBOL_GPL(register_kretprobe); -EXPORT_SYMBOL_GPL(unregister_kretprobe); -EXPORT_SYMBOL_GPL(register_kretprobes); -EXPORT_SYMBOL_GPL(unregister_kretprobes); diff --git a/kernel/module.c b/kernel/module.c index c268a771595..05f014efa32 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod, if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; #endif - /* Don't keep __versions around; it's just for loading. */ - if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) - sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; } modindex = find_sec(hdr, sechdrs, secstrings, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b1a1b968f74..4286b62b34a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,6 +101,7 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static unsigned long one_ul = 1; static int one_hundred = 100; +static int one_thousand = 1000; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -1040,6 +1041,28 @@ static struct ctl_table vm_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_pdflush_threads_min", + .data = &nr_pdflush_threads_min, + .maxlen = sizeof nr_pdflush_threads_min, + .mode = 0644 /* read-write */, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &nr_pdflush_threads_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_pdflush_threads_max", + .data = &nr_pdflush_threads_max, + .maxlen = sizeof nr_pdflush_threads_max, + .mode = 0644 /* read-write */, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &nr_pdflush_threads_min, + .extra2 = &one_thousand, + }, + { .ctl_name = VM_SWAPPINESS, .procname = "swappiness", .data = &vm_swappiness, diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 947c5b3f90c..b32ff446c3f 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, char *msg; struct blk_trace *bt; - if (count > BLK_TN_MAX_MSG) + if (count >= BLK_TN_MAX_MSG) return -EINVAL; - msg = kmalloc(count, GFP_KERNEL); + msg = kmalloc(count + 1, GFP_KERNEL); if (msg == NULL) return -ENOMEM; @@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, return -EFAULT; } + msg[count] = '\0'; bt = filp->private_data; __trace_note_message(bt, "%s", msg); kfree(msg); @@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, - sizeof(rq->cmd), rq->cmd); + rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0174a40c56..9d28476a985 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -30,6 +30,7 @@ #include <linux/percpu.h> #include <linux/splice.h> #include <linux/kdebug.h> +#include <linux/string.h> #include <linux/ctype.h> #include <linux/init.h> #include <linux/poll.h> @@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); -long -ns2usecs(cycle_t nsec) +unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; do_div(nsec, 1000); @@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter) return; cpumask_set_cpu(iter->cpu, iter->started); - trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); + + /* Don't print started cpu buffer for the first entry of the trace */ + if (iter->idx > 1) + trace_seq_printf(s, "##### CPU %u buffer started ####\n", + iter->cpu); } static enum print_line_t print_trace_fmt(struct trace_iterator *iter) @@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file) if (current_trace) *iter->trace = *current_trace; + if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) + goto fail; + + cpumask_clear(iter->started); + if (current_trace && current_trace->print_max) iter->tr = &max_tr; else @@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file) if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } + free_cpumask_var(iter->started); fail: mutex_unlock(&trace_types_lock); kfree(iter->trace); @@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file) seq_release(inode, file); mutex_destroy(&iter->mutex); + free_cpumask_var(iter->started); kfree(iter->trace); kfree(iter); return 0; @@ -2358,9 +2369,9 @@ static const char readme_msg[] = "# mkdir /debug\n" "# mount -t debugfs nodev /debug\n\n" "# cat /debug/tracing/available_tracers\n" - "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" + "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" "# cat /debug/tracing/current_tracer\n" - "none\n" + "nop\n" "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cbc168f1e43..e685ac2b2ba 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern long ns2usecs(cycle_t nsec); +extern unsigned long long ns2usecs(cycle_t nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4d9952d3df5..07a22c33ebf 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -40,7 +40,7 @@ #undef TRACE_FIELD_ZERO_CHAR #define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ + ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ "offset:%u;\tsize:0;\n", \ (unsigned int)offsetof(typeof(field), item)); \ if (!ret) \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d72b9a63b24..64b54a59c55 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" + ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx, diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index de35f200abd..9117cea6f1a 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) pc = preempt_count(); tracing_record_cmdline(current); + if (sched_stopped) + return; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = ctx_trace->data[cpu]; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3c5ad6b2ec8..5bc00e8f153 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); /* @@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) data = wakeup_trace->data[wakeup_cpu]; data->preempt_timestamp = ftrace_now(cpu); tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); + + /* + * We must be careful in using CALLER_ADDR2. But since wake_up + * is not called by an assembly function (where as schedule is) + * it should be safe to use it here. + */ trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); out_locked: |