diff options
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r-- | kernel/events/core.c | 199 |
1 files changed, 97 insertions, 102 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index d1a1bee3522..58690af323e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -25,11 +25,11 @@ #include <linux/reboot.h> #include <linux/vmstat.h> #include <linux/device.h> +#include <linux/export.h> #include <linux/vmalloc.h> #include <linux/hardirq.h> #include <linux/rculist.h> #include <linux/uaccess.h> -#include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/anon_inodes.h> #include <linux/kernel_stat.h> @@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, static void update_context_time(struct perf_event_context *ctx); static u64 perf_event_time(struct perf_event *event); +static void ring_buffer_attach(struct perf_event *event, + struct ring_buffer *rb); + void __weak perf_event_print_debug(void) { } extern __weak const char *perf_pmu_name(void) @@ -2171,9 +2174,10 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, */ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, ctx, task); + if (ctx->nr_events) + cpuctx->task_ctx = ctx; - cpuctx->task_ctx = ctx; + perf_event_sched_in(cpuctx, cpuctx->task_ctx, task); perf_pmu_enable(ctx->pmu); perf_ctx_unlock(cpuctx, ctx); @@ -3190,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) struct ring_buffer *rb; unsigned int events = POLL_HUP; + /* + * Race between perf_event_set_output() and perf_poll(): perf_poll() + * grabs the rb reference but perf_event_set_output() overrides it. + * Here is the timeline for two threads T1, T2: + * t0: T1, rb = rcu_dereference(event->rb) + * t1: T2, old_rb = event->rb + * t2: T2, event->rb = new rb + * t3: T2, ring_buffer_detach(old_rb) + * t4: T1, ring_buffer_attach(rb1) + * t5: T1, poll_wait(event->waitq) + * + * To avoid this problem, we grab mmap_mutex in perf_poll() + * thereby ensuring that the assignment of the new ring buffer + * and the detachment of the old buffer appear atomic to perf_poll() + */ + mutex_lock(&event->mmap_mutex); + rcu_read_lock(); rb = rcu_dereference(event->rb); - if (rb) + if (rb) { + ring_buffer_attach(event, rb); events = atomic_xchg(&rb->poll, 0); + } rcu_read_unlock(); + mutex_unlock(&event->mmap_mutex); + poll_wait(file, &event->waitq, wait); return events; @@ -3496,6 +3521,53 @@ unlock: return ret; } +static void ring_buffer_attach(struct perf_event *event, + struct ring_buffer *rb) +{ + unsigned long flags; + + if (!list_empty(&event->rb_entry)) + return; + + spin_lock_irqsave(&rb->event_lock, flags); + if (!list_empty(&event->rb_entry)) + goto unlock; + + list_add(&event->rb_entry, &rb->event_list); +unlock: + spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_detach(struct perf_event *event, + struct ring_buffer *rb) +{ + unsigned long flags; + + if (list_empty(&event->rb_entry)) + return; + + spin_lock_irqsave(&rb->event_lock, flags); + list_del_init(&event->rb_entry); + wake_up_all(&event->waitq); + spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_wakeup(struct perf_event *event) +{ + struct ring_buffer *rb; + + rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (!rb) + goto unlock; + + list_for_each_entry_rcu(event, &rb->event_list, rb_entry) + wake_up_all(&event->waitq); + +unlock: + rcu_read_unlock(); +} + static void rb_free_rcu(struct rcu_head *rcu_head) { struct ring_buffer *rb; @@ -3521,9 +3593,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) static void ring_buffer_put(struct ring_buffer *rb) { + struct perf_event *event, *n; + unsigned long flags; + if (!atomic_dec_and_test(&rb->refcount)) return; + spin_lock_irqsave(&rb->event_lock, flags); + list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { + list_del_init(&event->rb_entry); + wake_up_all(&event->waitq); + } + spin_unlock_irqrestore(&rb->event_lock, flags); + call_rcu(&rb->rcu_head, rb_free_rcu); } @@ -3544,8 +3626,9 @@ static void perf_mmap_close(struct vm_area_struct *vma) struct ring_buffer *rb = event->rb; atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); - vma->vm_mm->locked_vm -= event->mmap_locked; + vma->vm_mm->pinned_vm -= event->mmap_locked; rcu_assign_pointer(event->rb, NULL); + ring_buffer_detach(event, rb); mutex_unlock(&event->mmap_mutex); ring_buffer_put(rb); @@ -3625,7 +3708,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; - locked = vma->vm_mm->locked_vm + extra; + locked = vma->vm_mm->pinned_vm + extra; if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && !capable(CAP_IPC_LOCK)) { @@ -3651,7 +3734,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_long_add(user_extra, &user->locked_vm); event->mmap_locked = extra; event->mmap_user = get_current_user(); - vma->vm_mm->locked_vm += event->mmap_locked; + vma->vm_mm->pinned_vm += event->mmap_locked; unlock: if (!ret) @@ -3700,7 +3783,7 @@ static const struct file_operations perf_fops = { void perf_event_wakeup(struct perf_event *event) { - wake_up_all(&event->waitq); + ring_buffer_wakeup(event); if (event->pending_kill) { kill_fasync(&event->fasync, SIGIO, event->pending_kill); @@ -5822,6 +5905,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->group_entry); INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); + INIT_LIST_HEAD(&event->rb_entry); + init_waitqueue_head(&event->waitq); init_irq_work(&event->pending, perf_pending_event); @@ -6028,6 +6113,8 @@ set: old_rb = event->rb; rcu_assign_pointer(event->rb, rb); + if (old_rb) + ring_buffer_detach(event, old_rb); ret = 0; unlock: mutex_unlock(&event->mmap_mutex); @@ -6853,7 +6940,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); - if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) { + if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); @@ -6942,14 +7029,7 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; - /* - * Ignore suspend/resume action, the perf_pm_notifier will - * take care of that. - */ - if (action & CPU_TASKS_FROZEN) - return NOTIFY_OK; - - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: case CPU_DOWN_FAILED: @@ -6968,90 +7048,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -static void perf_pm_resume_cpu(void *unused) -{ - struct perf_cpu_context *cpuctx; - struct perf_event_context *ctx; - struct pmu *pmu; - int idx; - - idx = srcu_read_lock(&pmus_srcu); - list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - ctx = cpuctx->task_ctx; - - perf_ctx_lock(cpuctx, ctx); - perf_pmu_disable(cpuctx->ctx.pmu); - - cpu_ctx_sched_out(cpuctx, EVENT_ALL); - if (ctx) - ctx_sched_out(ctx, cpuctx, EVENT_ALL); - - perf_pmu_enable(cpuctx->ctx.pmu); - perf_ctx_unlock(cpuctx, ctx); - } - srcu_read_unlock(&pmus_srcu, idx); -} - -static void perf_pm_suspend_cpu(void *unused) -{ - struct perf_cpu_context *cpuctx; - struct perf_event_context *ctx; - struct pmu *pmu; - int idx; - - idx = srcu_read_lock(&pmus_srcu); - list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - ctx = cpuctx->task_ctx; - - perf_ctx_lock(cpuctx, ctx); - perf_pmu_disable(cpuctx->ctx.pmu); - - perf_event_sched_in(cpuctx, ctx, current); - - perf_pmu_enable(cpuctx->ctx.pmu); - perf_ctx_unlock(cpuctx, ctx); - } - srcu_read_unlock(&pmus_srcu, idx); -} - -static int perf_resume(void) -{ - get_online_cpus(); - smp_call_function(perf_pm_resume_cpu, NULL, 1); - put_online_cpus(); - - return NOTIFY_OK; -} - -static int perf_suspend(void) -{ - get_online_cpus(); - smp_call_function(perf_pm_suspend_cpu, NULL, 1); - put_online_cpus(); - - return NOTIFY_OK; -} - -static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr) -{ - switch (action) { - case PM_POST_HIBERNATION: - case PM_POST_SUSPEND: - return perf_resume(); - case PM_HIBERNATION_PREPARE: - case PM_SUSPEND_PREPARE: - return perf_suspend(); - default: - return NOTIFY_DONE; - } -} - -static struct notifier_block perf_pm_notifier = { - .notifier_call = perf_pm, -}; - void __init perf_event_init(void) { int ret; @@ -7066,7 +7062,6 @@ void __init perf_event_init(void) perf_tp_register(); perf_cpu_notifier(perf_cpu_notify); register_reboot_notifier(&perf_reboot_notifier); - register_pm_notifier(&perf_pm_notifier); ret = init_hw_breakpoint(); WARN(ret, "hw_breakpoint initialization failed with: %d", ret); |