diff options
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r-- | kernel/events/core.c | 359 |
1 files changed, 100 insertions, 259 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 58690af323e..ba36013cfb2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4,7 +4,7 @@ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> - * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * * For licensing details see kernel-base/COPYING */ @@ -128,7 +128,7 @@ enum event_type_t { * perf_sched_events : >0 events exist * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu */ -struct jump_label_key perf_sched_events __read_mostly; +struct jump_label_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static atomic_t nr_mmap_events __read_mostly; @@ -815,7 +815,7 @@ static void update_event_times(struct perf_event *event) * here. */ if (is_cgroup_event(event)) - run_end = perf_event_time(event); + run_end = perf_cgroup_event_time(event); else if (ctx->is_active) run_end = ctx->time; else @@ -1130,6 +1130,8 @@ event_sched_out(struct perf_event *event, if (!is_software_event(event)) cpuctx->active_oncpu--; ctx->nr_active--; + if (event->attr.freq && event->attr.sample_freq) + ctx->nr_freq--; if (event->attr.exclusive || !cpuctx->active_oncpu) cpuctx->exclusive = 0; } @@ -1325,6 +1327,7 @@ retry: } raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, struct perf_event_context *ctx, @@ -1406,6 +1409,8 @@ event_sched_in(struct perf_event *event, if (!is_software_event(event)) cpuctx->active_oncpu++; ctx->nr_active++; + if (event->attr.freq && event->attr.sample_freq) + ctx->nr_freq++; if (event->attr.exclusive) cpuctx->exclusive = 1; @@ -1662,8 +1667,7 @@ retry: * Note: this works for group members as well as group leaders * since the non-leader members' sibling_lists will be empty. */ -static void __perf_event_mark_enabled(struct perf_event *event, - struct perf_event_context *ctx) +static void __perf_event_mark_enabled(struct perf_event *event) { struct perf_event *sub; u64 tstamp = perf_event_time(event); @@ -1701,7 +1705,7 @@ static int __perf_event_enable(void *info) */ perf_cgroup_set_timestamp(current, ctx); - __perf_event_mark_enabled(event, ctx); + __perf_event_mark_enabled(event); if (!event_filter_match(event)) { if (is_cgroup_event(event)) @@ -1782,7 +1786,7 @@ void perf_event_enable(struct perf_event *event) retry: if (!ctx->is_active) { - __perf_event_mark_enabled(event, ctx); + __perf_event_mark_enabled(event); goto out; } @@ -1809,6 +1813,7 @@ retry: out: raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_enable); int perf_event_refresh(struct perf_event *event, int refresh) { @@ -2295,6 +2300,9 @@ do { \ return div64_u64(dividend, divisor); } +static DEFINE_PER_CPU(int, perf_throttled_count); +static DEFINE_PER_CPU(u64, perf_throttled_seq); + static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; @@ -2320,13 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) } } -static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) +/* + * combine freq adjustment with unthrottling to avoid two passes over the + * events. At the same time, make sure, having freq events does not change + * the rate of unthrottling as that would introduce bias. + */ +static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, + int needs_unthr) { struct perf_event *event; struct hw_perf_event *hwc; - u64 interrupts, now; + u64 now, period = TICK_NSEC; s64 delta; + /* + * only need to iterate over all events iff: + * - context have events in frequency mode (needs freq adjust) + * - there are events to unthrottle on this cpu + */ + if (!(ctx->nr_freq || needs_unthr)) + return; + + raw_spin_lock(&ctx->lock); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->state != PERF_EVENT_STATE_ACTIVE) continue; @@ -2336,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) hwc = &event->hw; - interrupts = hwc->interrupts; - hwc->interrupts = 0; - - /* - * unthrottle events on the tick - */ - if (interrupts == MAX_INTERRUPTS) { + if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) { + hwc->interrupts = 0; perf_log_throttle(event, 1); event->pmu->start(event, 0); } @@ -2350,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) if (!event->attr.freq || !event->attr.sample_freq) continue; - event->pmu->read(event); + /* + * stop the event and update event->count + */ + event->pmu->stop(event, PERF_EF_UPDATE); + now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; hwc->freq_count_stamp = now; + /* + * restart the event + * reload only if value has changed + */ if (delta > 0) perf_adjust_period(event, period, delta); + + event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); } + + raw_spin_unlock(&ctx->lock); } /* @@ -2380,7 +2411,6 @@ static void rotate_ctx(struct perf_event_context *ctx) */ static void perf_rotate_context(struct perf_cpu_context *cpuctx) { - u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; struct perf_event_context *ctx = NULL; int rotate = 0, remove = 1; @@ -2397,15 +2427,12 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) rotate = 1; } - perf_ctx_lock(cpuctx, cpuctx->task_ctx); - perf_pmu_disable(cpuctx->ctx.pmu); - perf_ctx_adjust_freq(&cpuctx->ctx, interval); - if (ctx) - perf_ctx_adjust_freq(ctx, interval); - if (!rotate) goto done; + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + perf_pmu_disable(cpuctx->ctx.pmu); + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); @@ -2416,22 +2443,33 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) perf_event_sched_in(cpuctx, ctx, current); + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); done: if (remove) list_del_init(&cpuctx->rotation_list); - - perf_pmu_enable(cpuctx->ctx.pmu); - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } void perf_event_task_tick(void) { struct list_head *head = &__get_cpu_var(rotation_list); struct perf_cpu_context *cpuctx, *tmp; + struct perf_event_context *ctx; + int throttled; WARN_ON(!irqs_disabled()); + __this_cpu_inc(perf_throttled_seq); + throttled = __this_cpu_xchg(perf_throttled_count, 0); + list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { + ctx = &cpuctx->ctx; + perf_adjust_freq_unthr_context(ctx, throttled); + + ctx = cpuctx->task_ctx; + if (ctx) + perf_adjust_freq_unthr_context(ctx, throttled); + if (cpuctx->jiffies_interval == 1 || !(jiffies % cpuctx->jiffies_interval)) perf_rotate_context(cpuctx); @@ -2448,7 +2486,7 @@ static int event_enable_on_exec(struct perf_event *event, if (event->state >= PERF_EVENT_STATE_INACTIVE) return 0; - __perf_event_mark_enabled(event, ctx); + __perf_event_mark_enabled(event); return 1; } @@ -2480,13 +2518,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) raw_spin_lock(&ctx->lock); task_ctx_sched_out(ctx); - list_for_each_entry(event, &ctx->pinned_groups, group_entry) { - ret = event_enable_on_exec(event, ctx); - if (ret) - enabled = 1; - } - - list_for_each_entry(event, &ctx->flexible_groups, group_entry) { + list_for_each_entry(event, &ctx->event_list, event_entry) { ret = event_enable_on_exec(event, ctx); if (ret) enabled = 1; @@ -2574,215 +2606,6 @@ static u64 perf_event_read(struct perf_event *event) } /* - * Callchain support - */ - -struct callchain_cpus_entries { - struct rcu_head rcu_head; - struct perf_callchain_entry *cpu_entries[0]; -}; - -static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); -static atomic_t nr_callchain_events; -static DEFINE_MUTEX(callchain_mutex); -struct callchain_cpus_entries *callchain_cpus_entries; - - -__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ -} - -__weak void perf_callchain_user(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ -} - -static void release_callchain_buffers_rcu(struct rcu_head *head) -{ - struct callchain_cpus_entries *entries; - int cpu; - - entries = container_of(head, struct callchain_cpus_entries, rcu_head); - - for_each_possible_cpu(cpu) - kfree(entries->cpu_entries[cpu]); - - kfree(entries); -} - -static void release_callchain_buffers(void) -{ - struct callchain_cpus_entries *entries; - - entries = callchain_cpus_entries; - rcu_assign_pointer(callchain_cpus_entries, NULL); - call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); -} - -static int alloc_callchain_buffers(void) -{ - int cpu; - int size; - struct callchain_cpus_entries *entries; - - /* - * We can't use the percpu allocation API for data that can be - * accessed from NMI. Use a temporary manual per cpu allocation - * until that gets sorted out. - */ - size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); - - entries = kzalloc(size, GFP_KERNEL); - if (!entries) - return -ENOMEM; - - size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; - - for_each_possible_cpu(cpu) { - entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, - cpu_to_node(cpu)); - if (!entries->cpu_entries[cpu]) - goto fail; - } - - rcu_assign_pointer(callchain_cpus_entries, entries); - - return 0; - -fail: - for_each_possible_cpu(cpu) - kfree(entries->cpu_entries[cpu]); - kfree(entries); - - return -ENOMEM; -} - -static int get_callchain_buffers(void) -{ - int err = 0; - int count; - - mutex_lock(&callchain_mutex); - - count = atomic_inc_return(&nr_callchain_events); - if (WARN_ON_ONCE(count < 1)) { - err = -EINVAL; - goto exit; - } - - if (count > 1) { - /* If the allocation failed, give up */ - if (!callchain_cpus_entries) - err = -ENOMEM; - goto exit; - } - - err = alloc_callchain_buffers(); - if (err) - release_callchain_buffers(); -exit: - mutex_unlock(&callchain_mutex); - - return err; -} - -static void put_callchain_buffers(void) -{ - if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { - release_callchain_buffers(); - mutex_unlock(&callchain_mutex); - } -} - -static int get_recursion_context(int *recursion) -{ - int rctx; - - if (in_nmi()) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; - - if (recursion[rctx]) - return -1; - - recursion[rctx]++; - barrier(); - - return rctx; -} - -static inline void put_recursion_context(int *recursion, int rctx) -{ - barrier(); - recursion[rctx]--; -} - -static struct perf_callchain_entry *get_callchain_entry(int *rctx) -{ - int cpu; - struct callchain_cpus_entries *entries; - - *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); - if (*rctx == -1) - return NULL; - - entries = rcu_dereference(callchain_cpus_entries); - if (!entries) - return NULL; - - cpu = smp_processor_id(); - - return &entries->cpu_entries[cpu][*rctx]; -} - -static void -put_callchain_entry(int rctx) -{ - put_recursion_context(__get_cpu_var(callchain_recursion), rctx); -} - -static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - int rctx; - struct perf_callchain_entry *entry; - - - entry = get_callchain_entry(&rctx); - if (rctx == -1) - return NULL; - - if (!entry) - goto exit_put; - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_store(entry, PERF_CONTEXT_KERNEL); - perf_callchain_kernel(entry, regs); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - perf_callchain_store(entry, PERF_CONTEXT_USER); - perf_callchain_user(entry, regs); - } - -exit_put: - put_callchain_entry(rctx); - - return entry; -} - -/* * Initialize the perf_event context in a task_struct: */ static void __perf_event_init_context(struct perf_event_context *ctx) @@ -2946,7 +2769,7 @@ static void free_event(struct perf_event *event) if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_dec(&perf_sched_events); + jump_label_dec_deferred(&perf_sched_events); if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); if (event->attr.comm) @@ -2957,7 +2780,7 @@ static void free_event(struct perf_event *event) put_callchain_buffers(); if (is_cgroup_event(event)) { atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_dec(&perf_sched_events); + jump_label_dec_deferred(&perf_sched_events); } } @@ -4707,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event, { int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; + u64 seq; int ret = 0; /* @@ -4716,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event, if (unlikely(!is_sampling_event(event))) return 0; - if (unlikely(hwc->interrupts >= max_samples_per_tick)) { - if (throttle) { + seq = __this_cpu_read(perf_throttled_seq); + if (seq != hwc->interrupts_seq) { + hwc->interrupts_seq = seq; + hwc->interrupts = 1; + } else { + hwc->interrupts++; + if (unlikely(throttle + && hwc->interrupts >= max_samples_per_tick)) { + __this_cpu_inc(perf_throttled_count); hwc->interrupts = MAX_INTERRUPTS; perf_log_throttle(event, 0); ret = 1; } - } else - hwc->interrupts++; + } if (event->attr.freq) { u64 now = perf_clock(); @@ -4820,7 +4650,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, struct hw_perf_event *hwc = &event->hw; int throttle = 0; - data->period = event->hw.last_period; if (!overflow) overflow = perf_swevent_set_period(event); @@ -4854,6 +4683,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, if (!is_sampling_event(event)) return; + if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) { + data->period = nr; + return perf_swevent_overflow(event, 1, data, regs); + } else + data->period = event->hw.last_period; + if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) return perf_swevent_overflow(event, 1, data, regs); @@ -5366,7 +5201,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) regs = get_irq_regs(); if (regs && !perf_exclude_event(event, regs)) { - if (!(event->attr.exclude_idle && current->pid == 0)) + if (!(event->attr.exclude_idle && is_idle_task(current))) if (perf_event_overflow(event, &data, regs)) ret = HRTIMER_NORESTART; } @@ -5981,7 +5816,7 @@ done: if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_inc(&perf_sched_events); + jump_label_inc(&perf_sched_events.key); if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); if (event->attr.comm) @@ -6219,7 +6054,7 @@ SYSCALL_DEFINE5(perf_event_open, * - that may need work on context switch */ atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_inc(&perf_sched_events); + jump_label_inc(&perf_sched_events.key); } /* @@ -7065,6 +6900,9 @@ void __init perf_event_init(void) ret = init_hw_breakpoint(); WARN(ret, "hw_breakpoint initialization failed with: %d", ret); + + /* do not patch jump label more than once per second */ + jump_label_rate_limit(&perf_sched_events, HZ); } static int __init perf_event_sysfs_init(void) @@ -7131,10 +6969,13 @@ static int __perf_cgroup_move(void *info) return 0; } -static void -perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) +static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup_taskset *tset) { - task_function_call(task, __perf_cgroup_move, task); + struct task_struct *task; + + cgroup_taskset_for_each(task, cgrp, tset) + task_function_call(task, __perf_cgroup_move, task); } static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, @@ -7148,7 +6989,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, if (!(task->flags & PF_EXITING)) return; - perf_cgroup_attach_task(cgrp, task); + task_function_call(task, __perf_cgroup_move, task); } struct cgroup_subsys perf_subsys = { @@ -7157,6 +6998,6 @@ struct cgroup_subsys perf_subsys = { .create = perf_cgroup_create, .destroy = perf_cgroup_destroy, .exit = perf_cgroup_exit, - .attach_task = perf_cgroup_attach_task, + .attach = perf_cgroup_attach, }; #endif /* CONFIG_CGROUP_PERF */ |