summaryrefslogtreecommitdiffstats
path: root/kernel/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c199
1 files changed, 97 insertions, 102 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d1a1bee3522..58690af323e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -25,11 +25,11 @@
#include <linux/reboot.h>
#include <linux/vmstat.h>
#include <linux/device.h>
+#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
-#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
@@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
static void update_context_time(struct perf_event_context *ctx);
static u64 perf_event_time(struct perf_event *event);
+static void ring_buffer_attach(struct perf_event *event,
+ struct ring_buffer *rb);
+
void __weak perf_event_print_debug(void) { }
extern __weak const char *perf_pmu_name(void)
@@ -2171,9 +2174,10 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
*/
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
- perf_event_sched_in(cpuctx, ctx, task);
+ if (ctx->nr_events)
+ cpuctx->task_ctx = ctx;
- cpuctx->task_ctx = ctx;
+ perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
perf_pmu_enable(ctx->pmu);
perf_ctx_unlock(cpuctx, ctx);
@@ -3190,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
struct ring_buffer *rb;
unsigned int events = POLL_HUP;
+ /*
+ * Race between perf_event_set_output() and perf_poll(): perf_poll()
+ * grabs the rb reference but perf_event_set_output() overrides it.
+ * Here is the timeline for two threads T1, T2:
+ * t0: T1, rb = rcu_dereference(event->rb)
+ * t1: T2, old_rb = event->rb
+ * t2: T2, event->rb = new rb
+ * t3: T2, ring_buffer_detach(old_rb)
+ * t4: T1, ring_buffer_attach(rb1)
+ * t5: T1, poll_wait(event->waitq)
+ *
+ * To avoid this problem, we grab mmap_mutex in perf_poll()
+ * thereby ensuring that the assignment of the new ring buffer
+ * and the detachment of the old buffer appear atomic to perf_poll()
+ */
+ mutex_lock(&event->mmap_mutex);
+
rcu_read_lock();
rb = rcu_dereference(event->rb);
- if (rb)
+ if (rb) {
+ ring_buffer_attach(event, rb);
events = atomic_xchg(&rb->poll, 0);
+ }
rcu_read_unlock();
+ mutex_unlock(&event->mmap_mutex);
+
poll_wait(file, &event->waitq, wait);
return events;
@@ -3496,6 +3521,53 @@ unlock:
return ret;
}
+static void ring_buffer_attach(struct perf_event *event,
+ struct ring_buffer *rb)
+{
+ unsigned long flags;
+
+ if (!list_empty(&event->rb_entry))
+ return;
+
+ spin_lock_irqsave(&rb->event_lock, flags);
+ if (!list_empty(&event->rb_entry))
+ goto unlock;
+
+ list_add(&event->rb_entry, &rb->event_list);
+unlock:
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_detach(struct perf_event *event,
+ struct ring_buffer *rb)
+{
+ unsigned long flags;
+
+ if (list_empty(&event->rb_entry))
+ return;
+
+ spin_lock_irqsave(&rb->event_lock, flags);
+ list_del_init(&event->rb_entry);
+ wake_up_all(&event->waitq);
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_wakeup(struct perf_event *event)
+{
+ struct ring_buffer *rb;
+
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (!rb)
+ goto unlock;
+
+ list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
+ wake_up_all(&event->waitq);
+
+unlock:
+ rcu_read_unlock();
+}
+
static void rb_free_rcu(struct rcu_head *rcu_head)
{
struct ring_buffer *rb;
@@ -3521,9 +3593,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
static void ring_buffer_put(struct ring_buffer *rb)
{
+ struct perf_event *event, *n;
+ unsigned long flags;
+
if (!atomic_dec_and_test(&rb->refcount))
return;
+ spin_lock_irqsave(&rb->event_lock, flags);
+ list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
+ list_del_init(&event->rb_entry);
+ wake_up_all(&event->waitq);
+ }
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+
call_rcu(&rb->rcu_head, rb_free_rcu);
}
@@ -3544,8 +3626,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
struct ring_buffer *rb = event->rb;
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
- vma->vm_mm->locked_vm -= event->mmap_locked;
+ vma->vm_mm->pinned_vm -= event->mmap_locked;
rcu_assign_pointer(event->rb, NULL);
+ ring_buffer_detach(event, rb);
mutex_unlock(&event->mmap_mutex);
ring_buffer_put(rb);
@@ -3625,7 +3708,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
- locked = vma->vm_mm->locked_vm + extra;
+ locked = vma->vm_mm->pinned_vm + extra;
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
!capable(CAP_IPC_LOCK)) {
@@ -3651,7 +3734,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
atomic_long_add(user_extra, &user->locked_vm);
event->mmap_locked = extra;
event->mmap_user = get_current_user();
- vma->vm_mm->locked_vm += event->mmap_locked;
+ vma->vm_mm->pinned_vm += event->mmap_locked;
unlock:
if (!ret)
@@ -3700,7 +3783,7 @@ static const struct file_operations perf_fops = {
void perf_event_wakeup(struct perf_event *event)
{
- wake_up_all(&event->waitq);
+ ring_buffer_wakeup(event);
if (event->pending_kill) {
kill_fasync(&event->fasync, SIGIO, event->pending_kill);
@@ -5822,6 +5905,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
INIT_LIST_HEAD(&event->group_entry);
INIT_LIST_HEAD(&event->event_entry);
INIT_LIST_HEAD(&event->sibling_list);
+ INIT_LIST_HEAD(&event->rb_entry);
+
init_waitqueue_head(&event->waitq);
init_irq_work(&event->pending, perf_pending_event);
@@ -6028,6 +6113,8 @@ set:
old_rb = event->rb;
rcu_assign_pointer(event->rb, rb);
+ if (old_rb)
+ ring_buffer_detach(event, old_rb);
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
@@ -6853,7 +6940,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
- if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
+ if (swhash->hlist_refcount > 0) {
struct swevent_hlist *hlist;
hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -6942,14 +7029,7 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
- /*
- * Ignore suspend/resume action, the perf_pm_notifier will
- * take care of that.
- */
- if (action & CPU_TASKS_FROZEN)
- return NOTIFY_OK;
-
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
case CPU_DOWN_FAILED:
@@ -6968,90 +7048,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
-static void perf_pm_resume_cpu(void *unused)
-{
- struct perf_cpu_context *cpuctx;
- struct perf_event_context *ctx;
- struct pmu *pmu;
- int idx;
-
- idx = srcu_read_lock(&pmus_srcu);
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
- ctx = cpuctx->task_ctx;
-
- perf_ctx_lock(cpuctx, ctx);
- perf_pmu_disable(cpuctx->ctx.pmu);
-
- cpu_ctx_sched_out(cpuctx, EVENT_ALL);
- if (ctx)
- ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-
- perf_pmu_enable(cpuctx->ctx.pmu);
- perf_ctx_unlock(cpuctx, ctx);
- }
- srcu_read_unlock(&pmus_srcu, idx);
-}
-
-static void perf_pm_suspend_cpu(void *unused)
-{
- struct perf_cpu_context *cpuctx;
- struct perf_event_context *ctx;
- struct pmu *pmu;
- int idx;
-
- idx = srcu_read_lock(&pmus_srcu);
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
- ctx = cpuctx->task_ctx;
-
- perf_ctx_lock(cpuctx, ctx);
- perf_pmu_disable(cpuctx->ctx.pmu);
-
- perf_event_sched_in(cpuctx, ctx, current);
-
- perf_pmu_enable(cpuctx->ctx.pmu);
- perf_ctx_unlock(cpuctx, ctx);
- }
- srcu_read_unlock(&pmus_srcu, idx);
-}
-
-static int perf_resume(void)
-{
- get_online_cpus();
- smp_call_function(perf_pm_resume_cpu, NULL, 1);
- put_online_cpus();
-
- return NOTIFY_OK;
-}
-
-static int perf_suspend(void)
-{
- get_online_cpus();
- smp_call_function(perf_pm_suspend_cpu, NULL, 1);
- put_online_cpus();
-
- return NOTIFY_OK;
-}
-
-static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
-{
- switch (action) {
- case PM_POST_HIBERNATION:
- case PM_POST_SUSPEND:
- return perf_resume();
- case PM_HIBERNATION_PREPARE:
- case PM_SUSPEND_PREPARE:
- return perf_suspend();
- default:
- return NOTIFY_DONE;
- }
-}
-
-static struct notifier_block perf_pm_notifier = {
- .notifier_call = perf_pm,
-};
-
void __init perf_event_init(void)
{
int ret;
@@ -7066,7 +7062,6 @@ void __init perf_event_init(void)
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
register_reboot_notifier(&perf_reboot_notifier);
- register_pm_notifier(&perf_pm_notifier);
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);