summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/perf_event.c259
-rw-r--r--kernel/trace/ring_buffer.c21
-rw-r--r--kernel/trace/trace_event_perf.c21
-rw-r--r--kernel/trace/trace_events.c55
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--kernel/watchdog.c36
6 files changed, 285 insertions, 109 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index db5b5606468..2d74f31220a 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1782,6 +1782,216 @@ static u64 perf_event_read(struct perf_event *event)
}
/*
+ * Callchain support
+ */
+
+struct callchain_cpus_entries {
+ struct rcu_head rcu_head;
+ struct perf_callchain_entry *cpu_entries[0];
+};
+
+static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
+static atomic_t nr_callchain_events;
+static DEFINE_MUTEX(callchain_mutex);
+struct callchain_cpus_entries *callchain_cpus_entries;
+
+
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+static void release_callchain_buffers_rcu(struct rcu_head *head)
+{
+ struct callchain_cpus_entries *entries;
+ int cpu;
+
+ entries = container_of(head, struct callchain_cpus_entries, rcu_head);
+
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+
+ kfree(entries);
+}
+
+static void release_callchain_buffers(void)
+{
+ struct callchain_cpus_entries *entries;
+
+ entries = callchain_cpus_entries;
+ rcu_assign_pointer(callchain_cpus_entries, NULL);
+ call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
+}
+
+static int alloc_callchain_buffers(void)
+{
+ int cpu;
+ int size;
+ struct callchain_cpus_entries *entries;
+
+ /*
+ * We can't use the percpu allocation API for data that can be
+ * accessed from NMI. Use a temporary manual per cpu allocation
+ * until that gets sorted out.
+ */
+ size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
+ num_possible_cpus();
+
+ entries = kzalloc(size, GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+
+ for_each_possible_cpu(cpu) {
+ entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!entries->cpu_entries[cpu])
+ goto fail;
+ }
+
+ rcu_assign_pointer(callchain_cpus_entries, entries);
+
+ return 0;
+
+fail:
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+ kfree(entries);
+
+ return -ENOMEM;
+}
+
+static int get_callchain_buffers(void)
+{
+ int err = 0;
+ int count;
+
+ mutex_lock(&callchain_mutex);
+
+ count = atomic_inc_return(&nr_callchain_events);
+ if (WARN_ON_ONCE(count < 1)) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (count > 1) {
+ /* If the allocation failed, give up */
+ if (!callchain_cpus_entries)
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ err = alloc_callchain_buffers();
+ if (err)
+ release_callchain_buffers();
+exit:
+ mutex_unlock(&callchain_mutex);
+
+ return err;
+}
+
+static void put_callchain_buffers(void)
+{
+ if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
+ release_callchain_buffers();
+ mutex_unlock(&callchain_mutex);
+ }
+}
+
+static int get_recursion_context(int *recursion)
+{
+ int rctx;
+
+ if (in_nmi())
+ rctx = 3;
+ else if (in_irq())
+ rctx = 2;
+ else if (in_softirq())
+ rctx = 1;
+ else
+ rctx = 0;
+
+ if (recursion[rctx])
+ return -1;
+
+ recursion[rctx]++;
+ barrier();
+
+ return rctx;
+}
+
+static inline void put_recursion_context(int *recursion, int rctx)
+{
+ barrier();
+ recursion[rctx]--;
+}
+
+static struct perf_callchain_entry *get_callchain_entry(int *rctx)
+{
+ int cpu;
+ struct callchain_cpus_entries *entries;
+
+ *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+ if (*rctx == -1)
+ return NULL;
+
+ entries = rcu_dereference(callchain_cpus_entries);
+ if (!entries)
+ return NULL;
+
+ cpu = smp_processor_id();
+
+ return &entries->cpu_entries[cpu][*rctx];
+}
+
+static void
+put_callchain_entry(int rctx)
+{
+ put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+}
+
+static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+ int rctx;
+ struct perf_callchain_entry *entry;
+
+
+ entry = get_callchain_entry(&rctx);
+ if (rctx == -1)
+ return NULL;
+
+ if (!entry)
+ goto exit_put;
+
+ entry->nr = 0;
+
+ if (!user_mode(regs)) {
+ perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+ perf_callchain_kernel(entry, regs);
+ if (current->mm)
+ regs = task_pt_regs(current);
+ else
+ regs = NULL;
+ }
+
+ if (regs) {
+ perf_callchain_store(entry, PERF_CONTEXT_USER);
+ perf_callchain_user(entry, regs);
+ }
+
+exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+}
+
+/*
* Initialize the perf_event context in a task_struct:
*/
static void
@@ -1913,6 +2123,8 @@ static void free_event(struct perf_event *event)
atomic_dec(&nr_comm_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ put_callchain_buffers();
}
if (event->buffer) {
@@ -2956,16 +3168,6 @@ void perf_event_do_pending(void)
}
/*
- * Callchain support -- arch specific
- */
-
-__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
- return NULL;
-}
-
-
-/*
* We assume there is only KVM supporting the callbacks.
* Later on, we might change it to a list if there is
* another virtualization implementation supporting the callbacks.
@@ -3459,14 +3661,20 @@ static void perf_event_output(struct perf_event *event, int nmi,
struct perf_output_handle handle;
struct perf_event_header header;
+ /* protect the callchain buffers */
+ rcu_read_lock();
+
perf_prepare_sample(&header, data, event, regs);
if (perf_output_begin(&handle, event, header.size, nmi, 1))
- return;
+ goto exit;
perf_output_sample(&handle, &header, data, event);
perf_output_end(&handle);
+
+exit:
+ rcu_read_unlock();
}
/*
@@ -4222,32 +4430,16 @@ end:
int perf_swevent_get_recursion_context(void)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
- int rctx;
- if (in_nmi())
- rctx = 3;
- else if (in_irq())
- rctx = 2;
- else if (in_softirq())
- rctx = 1;
- else
- rctx = 0;
-
- if (cpuctx->recursion[rctx])
- return -1;
-
- cpuctx->recursion[rctx]++;
- barrier();
-
- return rctx;
+ return get_recursion_context(cpuctx->recursion);
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
void inline perf_swevent_put_recursion_context(int rctx)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
- barrier();
- cpuctx->recursion[rctx]--;
+
+ put_recursion_context(cpuctx->recursion, rctx);
}
void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4947,6 +5139,13 @@ done:
atomic_inc(&nr_comm_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+ err = get_callchain_buffers();
+ if (err) {
+ free_event(event);
+ return ERR_PTR(err);
+ }
+ }
}
return event;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 19cccc3c302..ef27017caa5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
+/*
+ * The total entries in the ring buffer is the running counter
+ * of entries entered into the ring buffer, minus the sum of
+ * the entries read from the ring buffer and the number of
+ * entries that were overwritten.
+ */
+static inline unsigned long
+rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return local_read(&cpu_buffer->entries) -
+ (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
+}
+
/**
* ring_buffer_entries_cpu - get the number of entries in a cpu buffer
* @buffer: The ring buffer
@@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
- unsigned long ret;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return 0;
cpu_buffer = buffer->buffers[cpu];
- ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
- - cpu_buffer->read;
- return ret;
+ return rb_num_of_entries(cpu_buffer);
}
EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
@@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
/* if you care about this being correct, lock the buffer */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
- entries += (local_read(&cpu_buffer->entries) -
- local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
+ entries += rb_num_of_entries(cpu_buffer);
}
return entries;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 31cc4cb0dbf..f3bbcd1c90c 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
#include <linux/kprobes.h>
#include "trace.h"
-static char *perf_trace_buf[4];
+static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int total_ref_count;
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
- struct hlist_head *list;
+ struct hlist_head __percpu *list;
int ret = -ENOMEM;
int cpu;
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
tp_event->perf_events = list;
if (!total_ref_count) {
- char *buf;
+ char __percpu *buf;
int i;
- for (i = 0; i < 4; i++) {
- buf = (char *)alloc_percpu(perf_trace_t);
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
+ buf = (char __percpu *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail;
@@ -65,7 +65,7 @@ fail:
if (!total_ref_count) {
int i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
@@ -104,13 +104,14 @@ int perf_trace_init(struct perf_event *p_event)
int perf_trace_enable(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct hlist_head __percpu *pcpu_list;
struct hlist_head *list;
- list = tp_event->perf_events;
- if (WARN_ON_ONCE(!list))
+ pcpu_list = tp_event->perf_events;
+ if (WARN_ON_ONCE(!pcpu_list))
return -EINVAL;
- list = this_cpu_ptr(list);
+ list = this_cpu_ptr(pcpu_list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return 0;
@@ -142,7 +143,7 @@ void perf_trace_destroy(struct perf_event *p_event)
tp_event->perf_events = NULL;
if (!--total_ref_count) {
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f14632..398c0e8b332 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -600,21 +600,29 @@ out:
enum {
FORMAT_HEADER = 1,
- FORMAT_PRINTFMT = 2,
+ FORMAT_FIELD_SEPERATOR = 2,
+ FORMAT_PRINTFMT = 3,
};
static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
- struct list_head *head;
+ struct list_head *common_head = &ftrace_common_fields;
+ struct list_head *head = trace_get_fields(call);
(*pos)++;
switch ((unsigned long)v) {
case FORMAT_HEADER:
- head = &ftrace_common_fields;
+ if (unlikely(list_empty(common_head)))
+ return NULL;
+
+ field = list_entry(common_head->prev,
+ struct ftrace_event_field, link);
+ return field;
+ case FORMAT_FIELD_SEPERATOR:
if (unlikely(list_empty(head)))
return NULL;
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
return NULL;
}
- head = trace_get_fields(call);
-
- /*
- * To separate common fields from event fields, the
- * LSB is set on the first event field. Clear it in case.
- */
- v = (void *)((unsigned long)v & ~1L);
-
field = v;
- /*
- * If this is a common field, and at the end of the list, then
- * continue with main list.
- */
- if (field->link.prev == &ftrace_common_fields) {
- if (unlikely(list_empty(head)))
- return NULL;
- field = list_entry(head->prev, struct ftrace_event_field, link);
- /* Set the LSB to notify f_show to print an extra newline */
- field = (struct ftrace_event_field *)
- ((unsigned long)field | 1);
- return field;
- }
-
- /* If we are done tell f_show to print the format */
- if (field->link.prev == head)
+ if (field->link.prev == common_head)
+ return (void *)FORMAT_FIELD_SEPERATOR;
+ else if (field->link.prev == head)
return (void *)FORMAT_PRINTFMT;
field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v)
seq_printf(m, "format:\n");
return 0;
+ case FORMAT_FIELD_SEPERATOR:
+ seq_putc(m, '\n');
+ return 0;
+
case FORMAT_PRINTFMT:
seq_printf(m, "\nprint fmt: %s\n",
call->print_fmt);
return 0;
}
- /*
- * To separate common fields from event fields, the
- * LSB is set on the first event field. Clear it and
- * print a newline if it is set.
- */
- if ((unsigned long)v & 1) {
- seq_putc(m, '\n');
- v = (void *)((unsigned long)v & ~1L);
- }
-
field = v;
/*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518..c93bcb24863 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -23,7 +23,7 @@ struct fgraph_cpu_data {
};
struct fgraph_data {
- struct fgraph_cpu_data *cpu_data;
+ struct fgraph_cpu_data __percpu *cpu_data;
/* Place to preserve last processed entry. */
struct ftrace_graph_ent_entry ent;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7f9c3c52ecc..fa71aebda4f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
-static int __read_mostly did_panic;
static int __initdata no_watchdog;
@@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
-static int
-watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
- did_panic = 1;
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
- .notifier_call = watchdog_panic,
-};
-
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
@@ -378,7 +365,7 @@ static int watchdog_nmi_enable(int cpu)
}
printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
- return -1;
+ return PTR_ERR(event);
/* success path */
out_save:
@@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu)
static int watchdog_enable(int cpu)
{
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
+ int err;
/* enable the perf event */
- if (watchdog_nmi_enable(cpu) != 0)
- return -1;
+ err = watchdog_nmi_enable(cpu);
+ if (err)
+ return err;
/* create the watchdog thread */
if (!p) {
p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
if (IS_ERR(p)) {
printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
- return -1;
+ return PTR_ERR(p);
}
kthread_bind(p, cpu);
per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -526,17 +515,16 @@ static int __cpuinit
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
+ int err = 0;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- if (watchdog_prepare_cpu(hotcpu))
- return NOTIFY_BAD;
+ err = watchdog_prepare_cpu(hotcpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- if (watchdog_enable(hotcpu))
- return NOTIFY_BAD;
+ err = watchdog_enable(hotcpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
@@ -549,7 +537,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
#endif /* CONFIG_HOTPLUG_CPU */
}
- return NOTIFY_OK;
+ return notifier_from_errno(err);
}
static struct notifier_block __cpuinitdata cpu_nfb = {
@@ -565,13 +553,11 @@ static int __init spawn_watchdog_task(void)
return 0;
err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
- WARN_ON(err == NOTIFY_BAD);
+ WARN_ON(notifier_to_errno(err));
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
- atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
-
return 0;
}
early_initcall(spawn_watchdog_task);