diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 114 |
1 files changed, 85 insertions, 29 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cedf4e26828..6781e9aab2c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -16,6 +16,8 @@ #include <linux/list.h> #include <linux/fs.h> +#include "trace.h" + /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -152,7 +154,7 @@ static inline int test_time_stamp(u64 delta) struct ring_buffer_per_cpu { int cpu; struct ring_buffer *buffer; - spinlock_t lock; + raw_spinlock_t lock; struct lock_class_key lock_key; struct list_head pages; struct buffer_page *head_page; /* read from head */ @@ -289,7 +291,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; - spin_lock_init(&cpu_buffer->lock); + cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&cpu_buffer->pages); page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), @@ -852,7 +854,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (write > BUF_PAGE_SIZE) { struct buffer_page *next_page = tail_page; - spin_lock_irqsave(&cpu_buffer->lock, flags); + local_irq_save(flags); + __raw_spin_lock(&cpu_buffer->lock); rb_inc_page(cpu_buffer, &next_page); @@ -928,7 +931,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, rb_set_commit_to_write(cpu_buffer); } - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); @@ -951,7 +955,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; out_unlock: - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); return NULL; } @@ -1022,8 +1027,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event; u64 ts, delta; int commit = 0; + int nr_loops = 0; again: + /* + * We allow for interrupts to reenter here and do a trace. + * If one does, it will cause this original code to loop + * back here. Even with heavy interrupts happening, this + * should only happen a few times in a row. If this happens + * 1000 times in a row, there must be either an interrupt + * storm or we have something buggy. + * Bail! + */ + if (unlikely(++nr_loops > 1000)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + ts = ring_buffer_time_stamp(cpu_buffer->cpu); /* @@ -1122,8 +1142,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, return NULL; /* If we are tracing schedule, we don't want to recurse */ - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); @@ -1154,10 +1173,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, return event; out: - if (resched) - preempt_enable_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); return NULL; } @@ -1199,12 +1215,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, /* * Only the last preempt count needs to restore preemption. */ - if (preempt_count() == 1) { - if (per_cpu(rb_need_resched, cpu)) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); - } else + if (preempt_count() == 1) + ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); + else preempt_enable_no_resched_notrace(); return 0; @@ -1237,8 +1250,7 @@ int ring_buffer_write(struct ring_buffer *buffer, if (atomic_read(&buffer->record_disabled)) return -EBUSY; - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); @@ -1264,10 +1276,7 @@ int ring_buffer_write(struct ring_buffer *buffer, ret = 0; out: - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); return ret; } @@ -1532,10 +1541,24 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = NULL; unsigned long flags; + int nr_loops = 0; - spin_lock_irqsave(&cpu_buffer->lock, flags); + local_irq_save(flags); + __raw_spin_lock(&cpu_buffer->lock); again: + /* + * This should normally only loop twice. But because the + * start of the reader inserts an empty page, it causes + * a case where we will loop three times. There should be no + * reason to loop four times (that I know of). + */ + if (unlikely(++nr_loops > 3)) { + RB_WARN_ON(cpu_buffer, 1); + reader = NULL; + goto out; + } + reader = cpu_buffer->reader_page; /* If there's more to read, return this page */ @@ -1583,7 +1606,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto again; out: - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); return reader; } @@ -1665,6 +1689,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; struct buffer_page *reader; + int nr_loops = 0; if (!cpu_isset(cpu, buffer->cpumask)) return NULL; @@ -1672,6 +1697,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) cpu_buffer = buffer->buffers[cpu]; again: + /* + * We repeat when a timestamp is encountered. It is possible + * to get multiple timestamps from an interrupt entering just + * as one timestamp is about to be written. The max times + * that this can happen is the number of nested interrupts we + * can have. Nesting 10 deep of interrupts is clearly + * an anomaly. + */ + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + reader = rb_get_reader_page(cpu_buffer); if (!reader) return NULL; @@ -1722,6 +1760,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; + int nr_loops = 0; if (ring_buffer_iter_empty(iter)) return NULL; @@ -1730,6 +1769,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) buffer = cpu_buffer->buffer; again: + /* + * We repeat when a timestamp is encountered. It is possible + * to get multiple timestamps from an interrupt entering just + * as one timestamp is about to be written. The max times + * that this can happen is the number of nested interrupts we + * can have. Nesting 10 deep of interrupts is clearly + * an anomaly. + */ + if (unlikely(++nr_loops > 10)) { + RB_WARN_ON(cpu_buffer, 1); + return NULL; + } + if (rb_per_cpu_empty(cpu_buffer)) return NULL; @@ -1824,9 +1876,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); synchronize_sched(); - spin_lock_irqsave(&cpu_buffer->lock, flags); + local_irq_save(flags); + __raw_spin_lock(&cpu_buffer->lock); ring_buffer_iter_reset(iter); - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); return iter; } @@ -1912,11 +1966,13 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) if (!cpu_isset(cpu, buffer->cpumask)) return; - spin_lock_irqsave(&cpu_buffer->lock, flags); + local_irq_save(flags); + __raw_spin_lock(&cpu_buffer->lock); rb_reset_cpu(cpu_buffer); - spin_unlock_irqrestore(&cpu_buffer->lock, flags); + __raw_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); } /** |