summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/ftrace.c15
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/linux/ftrace.h5
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/ftrace.c198
-rw-r--r--kernel/trace/ring_buffer.c394
-rw-r--r--kernel/trace/trace.c49
7 files changed, 446 insertions, 218 deletions
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4243e8bbdcb..32ff36596ab 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -435,7 +435,7 @@ static void run_sync(void)
local_irq_disable();
}
-static void ftrace_replace_code(int enable)
+void ftrace_replace_code(int enable)
{
struct ftrace_rec_iter *iter;
struct dyn_ftrace *rec;
@@ -493,18 +493,7 @@ void arch_ftrace_update_code(int command)
{
modifying_ftrace_code++;
- if (command & FTRACE_UPDATE_CALLS)
- ftrace_replace_code(1);
- else if (command & FTRACE_DISABLE_CALLS)
- ftrace_replace_code(0);
-
- if (command & FTRACE_UPDATE_TRACE_FUNC)
- ftrace_update_ftrace_func(ftrace_trace_function);
-
- if (command & FTRACE_START_FUNC_RET)
- ftrace_enable_ftrace_graph_caller();
- else if (command & FTRACE_STOP_FUNC_RET)
- ftrace_disable_ftrace_graph_caller();
+ ftrace_modify_all_code(command);
modifying_ftrace_code--;
}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8aeadf6b553..4e2e1cc505a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -486,8 +486,8 @@
CPU_DISCARD(init.data) \
MEM_DISCARD(init.data) \
KERNEL_CTORS() \
- *(.init.rodata) \
MCOUNT_REC() \
+ *(.init.rodata) \
FTRACE_EVENTS() \
TRACE_SYSCALLS() \
DEV_DISCARD(init.rodata) \
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d32cc5e4b0c..55e6d63d46d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -295,7 +295,7 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
int ftrace_update_record(struct dyn_ftrace *rec, int enable);
int ftrace_test_record(struct dyn_ftrace *rec, int enable);
void ftrace_run_stop_machine(int command);
-int ftrace_location(unsigned long ip);
+unsigned long ftrace_location(unsigned long ip);
extern ftrace_func_t ftrace_trace_function;
@@ -314,11 +314,14 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
/* defined in arch */
extern int ftrace_ip_converted(unsigned long ip);
extern int ftrace_dyn_arch_init(void *data);
+extern void ftrace_replace_code(int enable);
extern int ftrace_update_ftrace_func(ftrace_func_t func);
extern void ftrace_caller(void);
extern void ftrace_call(void);
extern void mcount_call(void);
+void ftrace_modify_all_code(int command);
+
#ifndef FTRACE_ADDR
#define FTRACE_ADDR ((unsigned long)ftrace_caller)
#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a1d2849f247..d81a1a53299 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,7 +141,6 @@ if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
- select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index cf81f27ce6c..a008663d86c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1383,44 +1383,73 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
static int ftrace_cmp_recs(const void *a, const void *b)
{
- const struct dyn_ftrace *reca = a;
- const struct dyn_ftrace *recb = b;
+ const struct dyn_ftrace *key = a;
+ const struct dyn_ftrace *rec = b;
- if (reca->ip > recb->ip)
- return 1;
- if (reca->ip < recb->ip)
+ if (key->flags < rec->ip)
return -1;
+ if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
+ return 1;
return 0;
}
-/**
- * ftrace_location - return true if the ip giving is a traced location
- * @ip: the instruction pointer to check
- *
- * Returns 1 if @ip given is a pointer to a ftrace location.
- * That is, the instruction that is either a NOP or call to
- * the function tracer. It checks the ftrace internal tables to
- * determine if the address belongs or not.
- */
-int ftrace_location(unsigned long ip)
+static unsigned long ftrace_location_range(unsigned long start, unsigned long end)
{
struct ftrace_page *pg;
struct dyn_ftrace *rec;
struct dyn_ftrace key;
- key.ip = ip;
+ key.ip = start;
+ key.flags = end; /* overload flags, as it is unsigned long */
for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ if (end < pg->records[0].ip ||
+ start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
+ continue;
rec = bsearch(&key, pg->records, pg->index,
sizeof(struct dyn_ftrace),
ftrace_cmp_recs);
if (rec)
- return 1;
+ return rec->ip;
}
return 0;
}
+/**
+ * ftrace_location - return true if the ip giving is a traced location
+ * @ip: the instruction pointer to check
+ *
+ * Returns rec->ip if @ip given is a pointer to a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+unsigned long ftrace_location(unsigned long ip)
+{
+ return ftrace_location_range(ip, ip);
+}
+
+/**
+ * ftrace_text_reserved - return true if range contains an ftrace location
+ * @start: start of range to search
+ * @end: end of range to search (inclusive). @end points to the last byte to check.
+ *
+ * Returns 1 if @start and @end contains a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+int ftrace_text_reserved(void *start, void *end)
+{
+ unsigned long ret;
+
+ ret = ftrace_location_range((unsigned long)start,
+ (unsigned long)end);
+
+ return (int)!!ret;
+}
+
static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1520,35 +1549,6 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
__ftrace_hash_rec_update(ops, filter_hash, 1);
}
-static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
-{
- if (ftrace_pages->index == ftrace_pages->size) {
- /* We should have allocated enough */
- if (WARN_ON(!ftrace_pages->next))
- return NULL;
- ftrace_pages = ftrace_pages->next;
- }
-
- return &ftrace_pages->records[ftrace_pages->index++];
-}
-
-static struct dyn_ftrace *
-ftrace_record_ip(unsigned long ip)
-{
- struct dyn_ftrace *rec;
-
- if (ftrace_disabled)
- return NULL;
-
- rec = ftrace_alloc_dyn_node(ip);
- if (!rec)
- return NULL;
-
- rec->ip = ip;
-
- return rec;
-}
-
static void print_ip_ins(const char *fmt, unsigned char *p)
{
int i;
@@ -1598,21 +1598,6 @@ void ftrace_bug(int failed, unsigned long ip)
}
}
-
-/* Return 1 if the address range is reserved for ftrace */
-int ftrace_text_reserved(void *start, void *end)
-{
- struct dyn_ftrace *rec;
- struct ftrace_page *pg;
-
- do_for_each_ftrace_rec(pg, rec) {
- if (rec->ip <= (unsigned long)end &&
- rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
- return 1;
- } while_for_each_ftrace_rec();
- return 0;
-}
-
static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
{
unsigned long flag = 0UL;
@@ -1698,7 +1683,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
return -1; /* unknow ftrace bug */
}
-static void ftrace_replace_code(int update)
+void __weak ftrace_replace_code(int enable)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
@@ -1708,7 +1693,7 @@ static void ftrace_replace_code(int update)
return;
do_for_each_ftrace_rec(pg, rec) {
- failed = __ftrace_replace_code(rec, update);
+ failed = __ftrace_replace_code(rec, enable);
if (failed) {
ftrace_bug(failed, rec->ip);
/* Stop processing */
@@ -1826,22 +1811,27 @@ int __weak ftrace_arch_code_modify_post_process(void)
return 0;
}
-static int __ftrace_modify_code(void *data)
+void ftrace_modify_all_code(int command)
{
- int *command = data;
-
- if (*command & FTRACE_UPDATE_CALLS)
+ if (command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
- else if (*command & FTRACE_DISABLE_CALLS)
+ else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- if (*command & FTRACE_UPDATE_TRACE_FUNC)
+ if (command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
- if (*command & FTRACE_START_FUNC_RET)
+ if (command & FTRACE_START_FUNC_RET)
ftrace_enable_ftrace_graph_caller();
- else if (*command & FTRACE_STOP_FUNC_RET)
+ else if (command & FTRACE_STOP_FUNC_RET)
ftrace_disable_ftrace_graph_caller();
+}
+
+static int __ftrace_modify_code(void *data)
+{
+ int *command = data;
+
+ ftrace_modify_all_code(*command);
return 0;
}
@@ -3666,22 +3656,36 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
return 0;
}
-static void ftrace_swap_recs(void *a, void *b, int size)
+static int ftrace_cmp_ips(const void *a, const void *b)
+{
+ const unsigned long *ipa = a;
+ const unsigned long *ipb = b;
+
+ if (*ipa > *ipb)
+ return 1;
+ if (*ipa < *ipb)
+ return -1;
+ return 0;
+}
+
+static void ftrace_swap_ips(void *a, void *b, int size)
{
- struct dyn_ftrace *reca = a;
- struct dyn_ftrace *recb = b;
- struct dyn_ftrace t;
+ unsigned long *ipa = a;
+ unsigned long *ipb = b;
+ unsigned long t;
- t = *reca;
- *reca = *recb;
- *recb = t;
+ t = *ipa;
+ *ipa = *ipb;
+ *ipb = t;
}
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
{
+ struct ftrace_page *start_pg;
struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
unsigned long count;
unsigned long *p;
unsigned long addr;
@@ -3693,8 +3697,11 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
- pg = ftrace_allocate_pages(count);
- if (!pg)
+ sort(start, count, sizeof(*start),
+ ftrace_cmp_ips, ftrace_swap_ips);
+
+ start_pg = ftrace_allocate_pages(count);
+ if (!start_pg)
return -ENOMEM;
mutex_lock(&ftrace_lock);
@@ -3707,7 +3714,7 @@ static int ftrace_process_locs(struct module *mod,
if (!mod) {
WARN_ON(ftrace_pages || ftrace_pages_start);
/* First initialization */
- ftrace_pages = ftrace_pages_start = pg;
+ ftrace_pages = ftrace_pages_start = start_pg;
} else {
if (!ftrace_pages)
goto out;
@@ -3718,11 +3725,11 @@ static int ftrace_process_locs(struct module *mod,
ftrace_pages = ftrace_pages->next;
}
- ftrace_pages->next = pg;
- ftrace_pages = pg;
+ ftrace_pages->next = start_pg;
}
p = start;
+ pg = start_pg;
while (p < end) {
addr = ftrace_call_adjust(*p++);
/*
@@ -3733,17 +3740,26 @@ static int ftrace_process_locs(struct module *mod,
*/
if (!addr)
continue;
- if (!ftrace_record_ip(addr))
- break;
+
+ if (pg->index == pg->size) {
+ /* We should have allocated enough */
+ if (WARN_ON(!pg->next))
+ break;
+ pg = pg->next;
+ }
+
+ rec = &pg->records[pg->index++];
+ rec->ip = addr;
}
- /* These new locations need to be initialized */
- ftrace_new_pgs = pg;
+ /* We should have used all pages */
+ WARN_ON(pg->next);
- /* Make each individual set of pages sorted by ips */
- for (; pg; pg = pg->next)
- sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
- ftrace_cmp_recs, ftrace_swap_recs);
+ /* Assign the last page to ftrace_pages */
+ ftrace_pages = pg;
+
+ /* These new locations need to be initialized */
+ ftrace_new_pgs = start_pg;
/*
* We only need to disable interrupts on start up
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2d5eb332082..68388f876d4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -23,6 +23,8 @@
#include <asm/local.h>
#include "trace.h"
+static void update_pages_handler(struct work_struct *work);
+
/*
* The ring buffer header is special. We must manually up keep it.
*/
@@ -470,12 +472,15 @@ struct ring_buffer_per_cpu {
/* ring buffer pages to update, > 0 to add, < 0 to remove */
int nr_pages_to_update;
struct list_head new_pages; /* new pages to add */
+ struct work_struct update_pages_work;
+ struct completion update_completion;
};
struct ring_buffer {
unsigned flags;
int cpus;
atomic_t record_disabled;
+ atomic_t resize_disabled;
cpumask_var_t cpumask;
struct lock_class_key *reader_lock_key;
@@ -940,6 +945,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
struct list_head *head = cpu_buffer->pages;
struct buffer_page *bpage, *tmp;
+ /* Reset the head page if it exists */
+ if (cpu_buffer->head_page)
+ rb_set_head_page(cpu_buffer);
+
rb_head_page_deactivate(cpu_buffer);
if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
@@ -1048,6 +1057,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
raw_spin_lock_init(&cpu_buffer->reader_lock);
lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+ INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
+ init_completion(&cpu_buffer->update_completion);
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
@@ -1235,70 +1246,222 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
-static void
-rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+static inline unsigned long rb_page_entries(struct buffer_page *bpage)
{
- struct buffer_page *bpage;
- struct list_head *p;
- unsigned i;
+ return local_read(&bpage->entries) & RB_WRITE_MASK;
+}
+
+static inline unsigned long rb_page_write(struct buffer_page *bpage)
+{
+ return local_read(&bpage->write) & RB_WRITE_MASK;
+}
+
+static int
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
+{
+ struct list_head *tail_page, *to_remove, *next_page;
+ struct buffer_page *to_remove_page, *tmp_iter_page;
+ struct buffer_page *last_page, *first_page;
+ unsigned int nr_removed;
+ unsigned long head_bit;
+ int page_entries;
+
+ head_bit = 0;
raw_spin_lock_irq(&cpu_buffer->reader_lock);
- rb_head_page_deactivate(cpu_buffer);
+ atomic_inc(&cpu_buffer->record_disabled);
+ /*
+ * We don't race with the readers since we have acquired the reader
+ * lock. We also don't race with writers after disabling recording.
+ * This makes it easy to figure out the first and the last page to be
+ * removed from the list. We unlink all the pages in between including
+ * the first and last pages. This is done in a busy loop so that we
+ * lose the least number of traces.
+ * The pages are freed after we restart recording and unlock readers.
+ */
+ tail_page = &cpu_buffer->tail_page->list;
- for (i = 0; i < nr_pages; i++) {
- if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- goto out;
- p = cpu_buffer->pages->next;
- bpage = list_entry(p, struct buffer_page, list);
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
+ /*
+ * tail page might be on reader page, we remove the next page
+ * from the ring buffer
+ */
+ if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+ tail_page = rb_list_head(tail_page->next);
+ to_remove = tail_page;
+
+ /* start of pages to remove */
+ first_page = list_entry(rb_list_head(to_remove->next),
+ struct buffer_page, list);
+
+ for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
+ to_remove = rb_list_head(to_remove)->next;
+ head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
}
- if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- goto out;
- rb_reset_cpu(cpu_buffer);
- rb_check_pages(cpu_buffer);
+ next_page = rb_list_head(to_remove)->next;
-out:
+ /*
+ * Now we remove all pages between tail_page and next_page.
+ * Make sure that we have head_bit value preserved for the
+ * next page
+ */
+ tail_page->next = (struct list_head *)((unsigned long)next_page |
+ head_bit);
+ next_page = rb_list_head(next_page);
+ next_page->prev = tail_page;
+
+ /* make sure pages points to a valid page in the ring buffer */
+ cpu_buffer->pages = next_page;
+
+ /* update head page */
+ if (head_bit)
+ cpu_buffer->head_page = list_entry(next_page,
+ struct buffer_page, list);
+
+ /*
+ * change read pointer to make sure any read iterators reset
+ * themselves
+ */
+ cpu_buffer->read = 0;
+
+ /* pages are removed, resume tracing and then free the pages */
+ atomic_dec(&cpu_buffer->record_disabled);
raw_spin_unlock_irq(&cpu_buffer->reader_lock);
+
+ RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
+
+ /* last buffer page to remove */
+ last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
+ list);
+ tmp_iter_page = first_page;
+
+ do {
+ to_remove_page = tmp_iter_page;
+ rb_inc_page(cpu_buffer, &tmp_iter_page);
+
+ /* update the counters */
+ page_entries = rb_page_entries(to_remove_page);
+ if (page_entries) {
+ /*
+ * If something was added to this page, it was full
+ * since it is not the tail page. So we deduct the
+ * bytes consumed in ring buffer from here.
+ * No need to update overruns, since this page is
+ * deleted from ring buffer and its entries are
+ * already accounted for.
+ */
+ local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ }
+
+ /*
+ * We have already removed references to this list item, just
+ * free up the buffer_page and its page
+ */
+ free_buffer_page(to_remove_page);
+ nr_removed--;
+
+ } while (to_remove_page != last_page);
+
+ RB_WARN_ON(cpu_buffer, nr_removed);
+
+ return nr_removed == 0;
}
-static void
-rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
- struct list_head *pages, unsigned nr_pages)
+static int
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct buffer_page *bpage;
- struct list_head *p;
- unsigned i;
+ struct list_head *pages = &cpu_buffer->new_pages;
+ int retries, success;
raw_spin_lock_irq(&cpu_buffer->reader_lock);
- rb_head_page_deactivate(cpu_buffer);
+ /*
+ * We are holding the reader lock, so the reader page won't be swapped
+ * in the ring buffer. Now we are racing with the writer trying to
+ * move head page and the tail page.
+ * We are going to adapt the reader page update process where:
+ * 1. We first splice the start and end of list of new pages between
+ * the head page and its previous page.
+ * 2. We cmpxchg the prev_page->next to point from head page to the
+ * start of new pages list.
+ * 3. Finally, we update the head->prev to the end of new list.
+ *
+ * We will try this process 10 times, to make sure that we don't keep
+ * spinning.
+ */
+ retries = 10;
+ success = 0;
+ while (retries--) {
+ struct list_head *head_page, *prev_page, *r;
+ struct list_head *last_page, *first_page;
+ struct list_head *head_page_with_bit;
- for (i = 0; i < nr_pages; i++) {
- if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
- goto out;
- p = pages->next;
- bpage = list_entry(p, struct buffer_page, list);
- list_del_init(&bpage->list);
- list_add_tail(&bpage->list, cpu_buffer->pages);
+ head_page = &rb_set_head_page(cpu_buffer)->list;
+ prev_page = head_page->prev;
+
+ first_page = pages->next;
+ last_page = pages->prev;
+
+ head_page_with_bit = (struct list_head *)
+ ((unsigned long)head_page | RB_PAGE_HEAD);
+
+ last_page->next = head_page_with_bit;
+ first_page->prev = prev_page;
+
+ r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
+
+ if (r == head_page_with_bit) {
+ /*
+ * yay, we replaced the page pointer to our new list,
+ * now, we just have to update to head page's prev
+ * pointer to point to end of list
+ */
+ head_page->prev = last_page;
+ success = 1;
+ break;
+ }
}
- rb_reset_cpu(cpu_buffer);
- rb_check_pages(cpu_buffer);
-out:
+ if (success)
+ INIT_LIST_HEAD(pages);
+ /*
+ * If we weren't successful in adding in new pages, warn and stop
+ * tracing
+ */
+ RB_WARN_ON(cpu_buffer, !success);
raw_spin_unlock_irq(&cpu_buffer->reader_lock);
+
+ /* free pages if they weren't inserted */
+ if (!success) {
+ struct buffer_page *bpage, *tmp;
+ list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
+ list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ }
+ return success;
}
-static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer)
+static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
+ int success;
+
if (cpu_buffer->nr_pages_to_update > 0)
- rb_insert_pages(cpu_buffer, &cpu_buffer->new_pages,
- cpu_buffer->nr_pages_to_update);
+ success = rb_insert_pages(cpu_buffer);
else
- rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update);
- cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
- /* reset this value */
- cpu_buffer->nr_pages_to_update = 0;
+ success = rb_remove_pages(cpu_buffer,
+ -cpu_buffer->nr_pages_to_update);
+
+ if (success)
+ cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
+}
+
+static void update_pages_handler(struct work_struct *work)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
+ struct ring_buffer_per_cpu, update_pages_work);
+ rb_update_pages(cpu_buffer);
+ complete(&cpu_buffer->update_completion);
}
/**
@@ -1308,14 +1471,14 @@ static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer)
*
* Minimum size is 2 * BUF_PAGE_SIZE.
*
- * Returns -1 on failure.
+ * Returns 0 on success and < 0 on failure.
*/
int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
int cpu_id)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned nr_pages;
- int cpu;
+ int cpu, err = 0;
/*
* Always succeed at resizing a non-existent buffer:
@@ -1330,15 +1493,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
if (size < BUF_PAGE_SIZE * 2)
size = BUF_PAGE_SIZE * 2;
- atomic_inc(&buffer->record_disabled);
+ nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
- /* Make sure all writers are done with this buffer. */
- synchronize_sched();
+ /*
+ * Don't succeed if resizing is disabled, as a reader might be
+ * manipulating the ring buffer and is expecting a sane state while
+ * this is true.
+ */
+ if (atomic_read(&buffer->resize_disabled))
+ return -EBUSY;
+ /* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
- get_online_cpus();
-
- nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
if (cpu_id == RING_BUFFER_ALL_CPUS) {
/* calculate the pages to update */
@@ -1347,33 +1513,67 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
cpu_buffer->nr_pages_to_update = nr_pages -
cpu_buffer->nr_pages;
-
/*
* nothing more to do for removing pages or no update
*/
if (cpu_buffer->nr_pages_to_update <= 0)
continue;
-
/*
* to add pages, make sure all new pages can be
* allocated without receiving ENOMEM
*/
INIT_LIST_HEAD(&cpu_buffer->new_pages);
if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu))
+ &cpu_buffer->new_pages, cpu)) {
/* not enough memory for new pages */
- goto no_mem;
+ err = -ENOMEM;
+ goto out_err;
+ }
+ }
+
+ get_online_cpus();
+ /*
+ * Fire off all the required work handlers
+ * Look out for offline CPUs
+ */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ if (!cpu_buffer->nr_pages_to_update ||
+ !cpu_online(cpu))
+ continue;
+
+ schedule_work_on(cpu, &cpu_buffer->update_pages_work);
+ }
+ /*
+ * This loop is for the CPUs that are not online.
+ * We can't schedule anything on them, but it's not necessary
+ * since we can change their buffer sizes without any race.
+ */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ if (!cpu_buffer->nr_pages_to_update ||
+ cpu_online(cpu))
+ continue;
+
+ rb_update_pages(cpu_buffer);
}
/* wait for all the updates to complete */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
- if (cpu_buffer->nr_pages_to_update) {
- update_pages_handler(cpu_buffer);
- }
+ if (!cpu_buffer->nr_pages_to_update ||
+ !cpu_online(cpu))
+ continue;
+
+ wait_for_completion(&cpu_buffer->update_completion);
+ /* reset this value */
+ cpu_buffer->nr_pages_to_update = 0;
}
+
+ put_online_cpus();
} else {
cpu_buffer = buffer->buffers[cpu_id];
+
if (nr_pages == cpu_buffer->nr_pages)
goto out;
@@ -1383,38 +1583,70 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
INIT_LIST_HEAD(&cpu_buffer->new_pages);
if (cpu_buffer->nr_pages_to_update > 0 &&
__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
- &cpu_buffer->new_pages, cpu_id))
- goto no_mem;
+ &cpu_buffer->new_pages, cpu_id)) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ get_online_cpus();
- update_pages_handler(cpu_buffer);
+ if (cpu_online(cpu_id)) {
+ schedule_work_on(cpu_id,
+ &cpu_buffer->update_pages_work);
+ wait_for_completion(&cpu_buffer->update_completion);
+ } else
+ rb_update_pages(cpu_buffer);
+
+ put_online_cpus();
+ /* reset this value */
+ cpu_buffer->nr_pages_to_update = 0;
}
out:
- put_online_cpus();
- mutex_unlock(&buffer->mutex);
-
- atomic_dec(&buffer->record_disabled);
+ /*
+ * The ring buffer resize can happen with the ring buffer
+ * enabled, so that the update disturbs the tracing as little
+ * as possible. But if the buffer is disabled, we do not need
+ * to worry about that, and we can take the time to verify
+ * that the buffer is not corrupt.
+ */
+ if (atomic_read(&buffer->record_disabled)) {
+ atomic_inc(&buffer->record_disabled);
+ /*
+ * Even though the buffer was disabled, we must make sure
+ * that it is truly disabled before calling rb_check_pages.
+ * There could have been a race between checking
+ * record_disable and incrementing it.
+ */
+ synchronize_sched();
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ rb_check_pages(cpu_buffer);
+ }
+ atomic_dec(&buffer->record_disabled);
+ }
+ mutex_unlock(&buffer->mutex);
return size;
- no_mem:
+ out_err:
for_each_buffer_cpu(buffer, cpu) {
struct buffer_page *bpage, *tmp;
+
cpu_buffer = buffer->buffers[cpu];
- /* reset this number regardless */
cpu_buffer->nr_pages_to_update = 0;
+
if (list_empty(&cpu_buffer->new_pages))
continue;
+
list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
list) {
list_del_init(&bpage->list);
free_buffer_page(bpage);
}
}
- put_online_cpus();
mutex_unlock(&buffer->mutex);
- atomic_dec(&buffer->record_disabled);
- return -ENOMEM;
+ return err;
}
EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1453,21 +1685,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
return __rb_page_index(iter->head_page, iter->head);
}
-static inline unsigned long rb_page_write(struct buffer_page *bpage)
-{
- return local_read(&bpage->write) & RB_WRITE_MASK;
-}
-
static inline unsigned rb_page_commit(struct buffer_page *bpage)
{
return local_read(&bpage->page->commit);
}
-static inline unsigned long rb_page_entries(struct buffer_page *bpage)
-{
- return local_read(&bpage->entries) & RB_WRITE_MASK;
-}
-
/* Size is determined by what has been committed */
static inline unsigned rb_page_size(struct buffer_page *bpage)
{
@@ -3492,6 +3714,7 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
iter->cpu_buffer = cpu_buffer;
+ atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
return iter;
@@ -3554,7 +3777,14 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ /*
+ * Ring buffer is disabled from recording, here's a good place
+ * to check the integrity of the ring buffer.
+ */
+ rb_check_pages(cpu_buffer);
+
atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&cpu_buffer->buffer->resize_disabled);
kfree(iter);
}
EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
@@ -3626,6 +3856,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->commit_page = cpu_buffer->head_page;
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+ INIT_LIST_HEAD(&cpu_buffer->new_pages);
local_set(&cpu_buffer->reader_page->write, 0);
local_set(&cpu_buffer->reader_page->entries, 0);
local_set(&cpu_buffer->reader_page->page->commit, 0);
@@ -3662,8 +3893,12 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
+ atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
+ /* Make sure all commits have finished */
+ synchronize_sched();
+
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
@@ -3679,6 +3914,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&buffer->resize_disabled);
}
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 509e8615f50..33ae2f196fa 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2669,10 +2669,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
if (cpumask_test_cpu(cpu, tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(&global_trace.data[cpu]->disabled);
+ ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
}
if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(&global_trace.data[cpu]->disabled);
+ ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
}
}
arch_spin_unlock(&ftrace_max_lock);
@@ -3076,20 +3078,10 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
{
- int cpu, ret = size;
+ int ret = size;
mutex_lock(&trace_types_lock);
- tracing_stop();
-
- /* disable all cpu buffers */
- for_each_tracing_cpu(cpu) {
- if (global_trace.data[cpu])
- atomic_inc(&global_trace.data[cpu]->disabled);
- if (max_tr.data[cpu])
- atomic_inc(&max_tr.data[cpu]->disabled);
- }
-
if (cpu_id != RING_BUFFER_ALL_CPUS) {
/* make sure, this cpu is enabled in the mask */
if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
@@ -3103,14 +3095,6 @@ static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
ret = -ENOMEM;
out:
- for_each_tracing_cpu(cpu) {
- if (global_trace.data[cpu])
- atomic_dec(&global_trace.data[cpu]->disabled);
- if (max_tr.data[cpu])
- atomic_dec(&max_tr.data[cpu]->disabled);
- }
-
- tracing_start();
mutex_unlock(&trace_types_lock);
return ret;
@@ -3875,14 +3859,14 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
struct print_entry *entry;
unsigned long irq_flags;
struct page *pages[2];
+ void *map_page[2];
int nr_pages = 1;
ssize_t written;
- void *page1;
- void *page2;
int offset;
int size;
int len;
int ret;
+ int i;
if (tracing_disabled)
return -EINVAL;
@@ -3921,9 +3905,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
goto out;
}
- page1 = kmap_atomic(pages[0]);
- if (nr_pages == 2)
- page2 = kmap_atomic(pages[1]);
+ for (i = 0; i < nr_pages; i++)
+ map_page[i] = kmap_atomic(pages[i]);
local_save_flags(irq_flags);
size = sizeof(*entry) + cnt + 2; /* possible \n added */
@@ -3941,10 +3924,10 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (nr_pages == 2) {
len = PAGE_SIZE - offset;
- memcpy(&entry->buf, page1 + offset, len);
- memcpy(&entry->buf[len], page2, cnt - len);
+ memcpy(&entry->buf, map_page[0] + offset, len);
+ memcpy(&entry->buf[len], map_page[1], cnt - len);
} else
- memcpy(&entry->buf, page1 + offset, cnt);
+ memcpy(&entry->buf, map_page[0] + offset, cnt);
if (entry->buf[cnt - 1] != '\n') {
entry->buf[cnt] = '\n';
@@ -3959,11 +3942,10 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
*fpos += written;
out_unlock:
- if (nr_pages == 2)
- kunmap_atomic(page2);
- kunmap_atomic(page1);
- while (nr_pages > 0)
- put_page(pages[--nr_pages]);
+ for (i = 0; i < nr_pages; i++){
+ kunmap_atomic(map_page[i]);
+ put_page(pages[i]);
+ }
out:
return written;
}
@@ -4494,6 +4476,9 @@ static void tracing_init_debugfs_percpu(long cpu)
struct dentry *d_cpu;
char cpu_dir[30]; /* 30 characters should be more than enough */
+ if (!d_percpu)
+ return;
+
snprintf(cpu_dir, 30, "cpu%ld", cpu);
d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) {