diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/perf_event.c | 259 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 55 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 2 | ||||
-rw-r--r-- | kernel/watchdog.c | 36 |
6 files changed, 285 insertions, 109 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index db5b5606468..2d74f31220a 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1782,6 +1782,216 @@ static u64 perf_event_read(struct perf_event *event) } /* + * Callchain support + */ + +struct callchain_cpus_entries { + struct rcu_head rcu_head; + struct perf_callchain_entry *cpu_entries[0]; +}; + +static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); +static atomic_t nr_callchain_events; +static DEFINE_MUTEX(callchain_mutex); +struct callchain_cpus_entries *callchain_cpus_entries; + + +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +__weak void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +static void release_callchain_buffers_rcu(struct rcu_head *head) +{ + struct callchain_cpus_entries *entries; + int cpu; + + entries = container_of(head, struct callchain_cpus_entries, rcu_head); + + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + + kfree(entries); +} + +static void release_callchain_buffers(void) +{ + struct callchain_cpus_entries *entries; + + entries = callchain_cpus_entries; + rcu_assign_pointer(callchain_cpus_entries, NULL); + call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); +} + +static int alloc_callchain_buffers(void) +{ + int cpu; + int size; + struct callchain_cpus_entries *entries; + + /* + * We can't use the percpu allocation API for data that can be + * accessed from NMI. Use a temporary manual per cpu allocation + * until that gets sorted out. + */ + size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * + num_possible_cpus(); + + entries = kzalloc(size, GFP_KERNEL); + if (!entries) + return -ENOMEM; + + size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; + + for_each_possible_cpu(cpu) { + entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); + if (!entries->cpu_entries[cpu]) + goto fail; + } + + rcu_assign_pointer(callchain_cpus_entries, entries); + + return 0; + +fail: + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + kfree(entries); + + return -ENOMEM; +} + +static int get_callchain_buffers(void) +{ + int err = 0; + int count; + + mutex_lock(&callchain_mutex); + + count = atomic_inc_return(&nr_callchain_events); + if (WARN_ON_ONCE(count < 1)) { + err = -EINVAL; + goto exit; + } + + if (count > 1) { + /* If the allocation failed, give up */ + if (!callchain_cpus_entries) + err = -ENOMEM; + goto exit; + } + + err = alloc_callchain_buffers(); + if (err) + release_callchain_buffers(); +exit: + mutex_unlock(&callchain_mutex); + + return err; +} + +static void put_callchain_buffers(void) +{ + if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { + release_callchain_buffers(); + mutex_unlock(&callchain_mutex); + } +} + +static int get_recursion_context(int *recursion) +{ + int rctx; + + if (in_nmi()) + rctx = 3; + else if (in_irq()) + rctx = 2; + else if (in_softirq()) + rctx = 1; + else + rctx = 0; + + if (recursion[rctx]) + return -1; + + recursion[rctx]++; + barrier(); + + return rctx; +} + +static inline void put_recursion_context(int *recursion, int rctx) +{ + barrier(); + recursion[rctx]--; +} + +static struct perf_callchain_entry *get_callchain_entry(int *rctx) +{ + int cpu; + struct callchain_cpus_entries *entries; + + *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + if (*rctx == -1) + return NULL; + + entries = rcu_dereference(callchain_cpus_entries); + if (!entries) + return NULL; + + cpu = smp_processor_id(); + + return &entries->cpu_entries[cpu][*rctx]; +} + +static void +put_callchain_entry(int rctx) +{ + put_recursion_context(__get_cpu_var(callchain_recursion), rctx); +} + +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + int rctx; + struct perf_callchain_entry *entry; + + + entry = get_callchain_entry(&rctx); + if (rctx == -1) + return NULL; + + if (!entry) + goto exit_put; + + entry->nr = 0; + + if (!user_mode(regs)) { + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_kernel(entry, regs); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } + +exit_put: + put_callchain_entry(rctx); + + return entry; +} + +/* * Initialize the perf_event context in a task_struct: */ static void @@ -1913,6 +2123,8 @@ static void free_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); } if (event->buffer) { @@ -2956,16 +3168,6 @@ void perf_event_do_pending(void) } /* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - return NULL; -} - - -/* * We assume there is only KVM supporting the callbacks. * Later on, we might change it to a list if there is * another virtualization implementation supporting the callbacks. @@ -3459,14 +3661,20 @@ static void perf_event_output(struct perf_event *event, int nmi, struct perf_output_handle handle; struct perf_event_header header; + /* protect the callchain buffers */ + rcu_read_lock(); + perf_prepare_sample(&header, data, event, regs); if (perf_output_begin(&handle, event, header.size, nmi, 1)) - return; + goto exit; perf_output_sample(&handle, &header, data, event); perf_output_end(&handle); + +exit: + rcu_read_unlock(); } /* @@ -4222,32 +4430,16 @@ end: int perf_swevent_get_recursion_context(void) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - int rctx; - if (in_nmi()) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; - - if (cpuctx->recursion[rctx]) - return -1; - - cpuctx->recursion[rctx]++; - barrier(); - - return rctx; + return get_recursion_context(cpuctx->recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void inline perf_swevent_put_recursion_context(int rctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - barrier(); - cpuctx->recursion[rctx]--; + + put_recursion_context(cpuctx->recursion, rctx); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4947,6 +5139,13 @@ done: atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + err = get_callchain_buffers(); + if (err) { + free_event(event); + return ERR_PTR(err); + } + } } return event; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 19cccc3c302..ef27017caa5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); +/* + * The total entries in the ring buffer is the running counter + * of entries entered into the ring buffer, minus the sum of + * the entries read from the ring buffer and the number of + * entries that were overwritten. + */ +static inline unsigned long +rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) +{ + return local_read(&cpu_buffer->entries) - + (local_read(&cpu_buffer->overrun) + cpu_buffer->read); +} + /** * ring_buffer_entries_cpu - get the number of entries in a cpu buffer * @buffer: The ring buffer @@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; - ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) - - cpu_buffer->read; - return ret; + return rb_num_of_entries(cpu_buffer); } EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); @@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) /* if you care about this being correct, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - entries += (local_read(&cpu_buffer->entries) - - local_read(&cpu_buffer->overrun)) - cpu_buffer->read; + entries += rb_num_of_entries(cpu_buffer); } return entries; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 31cc4cb0dbf..f3bbcd1c90c 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,7 +9,7 @@ #include <linux/kprobes.h> #include "trace.h" -static char *perf_trace_buf[4]; +static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -24,7 +24,7 @@ static int total_ref_count; static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { - struct hlist_head *list; + struct hlist_head __percpu *list; int ret = -ENOMEM; int cpu; @@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, tp_event->perf_events = list; if (!total_ref_count) { - char *buf; + char __percpu *buf; int i; - for (i = 0; i < 4; i++) { - buf = (char *)alloc_percpu(perf_trace_t); + for (i = 0; i < PERF_NR_CONTEXTS; i++) { + buf = (char __percpu *)alloc_percpu(perf_trace_t); if (!buf) goto fail; @@ -65,7 +65,7 @@ fail: if (!total_ref_count) { int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } @@ -104,13 +104,14 @@ int perf_trace_init(struct perf_event *p_event) int perf_trace_enable(struct perf_event *p_event) { struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head __percpu *pcpu_list; struct hlist_head *list; - list = tp_event->perf_events; - if (WARN_ON_ONCE(!list)) + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; - list = this_cpu_ptr(list); + list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); return 0; @@ -142,7 +143,7 @@ void perf_trace_destroy(struct perf_event *p_event) tp_event->perf_events = NULL; if (!--total_ref_count) { - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4c758f14632..398c0e8b332 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -600,21 +600,29 @@ out: enum { FORMAT_HEADER = 1, - FORMAT_PRINTFMT = 2, + FORMAT_FIELD_SEPERATOR = 2, + FORMAT_PRINTFMT = 3, }; static void *f_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_event_call *call = m->private; struct ftrace_event_field *field; - struct list_head *head; + struct list_head *common_head = &ftrace_common_fields; + struct list_head *head = trace_get_fields(call); (*pos)++; switch ((unsigned long)v) { case FORMAT_HEADER: - head = &ftrace_common_fields; + if (unlikely(list_empty(common_head))) + return NULL; + + field = list_entry(common_head->prev, + struct ftrace_event_field, link); + return field; + case FORMAT_FIELD_SEPERATOR: if (unlikely(list_empty(head))) return NULL; @@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) return NULL; } - head = trace_get_fields(call); - - /* - * To separate common fields from event fields, the - * LSB is set on the first event field. Clear it in case. - */ - v = (void *)((unsigned long)v & ~1L); - field = v; - /* - * If this is a common field, and at the end of the list, then - * continue with main list. - */ - if (field->link.prev == &ftrace_common_fields) { - if (unlikely(list_empty(head))) - return NULL; - field = list_entry(head->prev, struct ftrace_event_field, link); - /* Set the LSB to notify f_show to print an extra newline */ - field = (struct ftrace_event_field *) - ((unsigned long)field | 1); - return field; - } - - /* If we are done tell f_show to print the format */ - if (field->link.prev == head) + if (field->link.prev == common_head) + return (void *)FORMAT_FIELD_SEPERATOR; + else if (field->link.prev == head) return (void *)FORMAT_PRINTFMT; field = list_entry(field->link.prev, struct ftrace_event_field, link); @@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v) seq_printf(m, "format:\n"); return 0; + case FORMAT_FIELD_SEPERATOR: + seq_putc(m, '\n'); + return 0; + case FORMAT_PRINTFMT: seq_printf(m, "\nprint fmt: %s\n", call->print_fmt); return 0; } - /* - * To separate common fields from event fields, the - * LSB is set on the first event field. Clear it and - * print a newline if it is set. - */ - if ((unsigned long)v & 1) { - seq_putc(m, '\n'); - v = (void *)((unsigned long)v & ~1L); - } - field = v; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6f233698518..c93bcb24863 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -23,7 +23,7 @@ struct fgraph_cpu_data { }; struct fgraph_data { - struct fgraph_cpu_data *cpu_data; + struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ struct ftrace_graph_ent_entry ent; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f9c3c52ecc..fa71aebda4f 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif -static int __read_mostly did_panic; static int __initdata no_watchdog; @@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -static int -watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr) -{ - did_panic = 1; - - return NOTIFY_DONE; -} - -static struct notifier_block panic_block = { - .notifier_call = watchdog_panic, -}; - #ifdef CONFIG_HARDLOCKUP_DETECTOR static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, @@ -378,7 +365,7 @@ static int watchdog_nmi_enable(int cpu) } printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); - return -1; + return PTR_ERR(event); /* success path */ out_save: @@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu) static int watchdog_enable(int cpu) { struct task_struct *p = per_cpu(softlockup_watchdog, cpu); + int err; /* enable the perf event */ - if (watchdog_nmi_enable(cpu) != 0) - return -1; + err = watchdog_nmi_enable(cpu); + if (err) + return err; /* create the watchdog thread */ if (!p) { p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); if (IS_ERR(p)) { printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); - return -1; + return PTR_ERR(p); } kthread_bind(p, cpu); per_cpu(watchdog_touch_ts, cpu) = 0; @@ -526,17 +515,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; + int err = 0; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (watchdog_prepare_cpu(hotcpu)) - return NOTIFY_BAD; + err = watchdog_prepare_cpu(hotcpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - if (watchdog_enable(hotcpu)) - return NOTIFY_BAD; + err = watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: @@ -549,7 +537,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; #endif /* CONFIG_HOTPLUG_CPU */ } - return NOTIFY_OK; + return notifier_from_errno(err); } static struct notifier_block __cpuinitdata cpu_nfb = { @@ -565,13 +553,11 @@ static int __init spawn_watchdog_task(void) return 0; err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - WARN_ON(err == NOTIFY_BAD); + WARN_ON(notifier_to_errno(err)); cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); - atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - return 0; } early_initcall(spawn_watchdog_task); |