diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 136 |
1 files changed, 65 insertions, 71 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f309e8014c7..cb6c0d2af68 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event) return event->cpu == -1 || event->cpu == smp_processor_id(); } -static int -__event_sched_out(struct perf_event *event, +static void +event_sched_out(struct perf_event *event, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx) { @@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event, } if (event->state != PERF_EVENT_STATE_ACTIVE) - return 0; + return; event->state = PERF_EVENT_STATE_INACTIVE; if (event->pending_disable) { event->pending_disable = 0; event->state = PERF_EVENT_STATE_OFF; } + event->tstamp_stopped = ctx->time; event->pmu->del(event, 0); event->oncpu = -1; @@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event, ctx->nr_active--; if (event->attr.exclusive || !cpuctx->active_oncpu) cpuctx->exclusive = 0; - return 1; -} - -static void -event_sched_out(struct perf_event *event, - struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx) -{ - int ret; - - ret = __event_sched_out(event, cpuctx, ctx); - if (ret) - event->tstamp_stopped = ctx->time; } static void @@ -664,7 +652,7 @@ retry: } static int -__event_sched_in(struct perf_event *event, +event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx) { @@ -684,6 +672,10 @@ __event_sched_in(struct perf_event *event, return -EAGAIN; } + event->tstamp_running += ctx->time - event->tstamp_stopped; + + event->shadow_ctx_time = ctx->time - ctx->timestamp; + if (!is_software_event(event)) cpuctx->active_oncpu++; ctx->nr_active++; @@ -694,35 +686,6 @@ __event_sched_in(struct perf_event *event, return 0; } -static inline int -event_sched_in(struct perf_event *event, - struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx) -{ - int ret = __event_sched_in(event, cpuctx, ctx); - if (ret) - return ret; - event->tstamp_running += ctx->time - event->tstamp_stopped; - return 0; -} - -static void -group_commit_event_sched_in(struct perf_event *group_event, - struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx) -{ - struct perf_event *event; - u64 now = ctx->time; - - group_event->tstamp_running += now - group_event->tstamp_stopped; - /* - * Schedule in siblings as one group (if any): - */ - list_for_each_entry(event, &group_event->sibling_list, group_entry) { - event->tstamp_running += now - event->tstamp_stopped; - } -} - static int group_sched_in(struct perf_event *group_event, struct perf_cpu_context *cpuctx, @@ -730,19 +693,15 @@ group_sched_in(struct perf_event *group_event, { struct perf_event *event, *partial_group = NULL; struct pmu *pmu = group_event->pmu; + u64 now = ctx->time; + bool simulate = false; if (group_event->state == PERF_EVENT_STATE_OFF) return 0; pmu->start_txn(pmu); - /* - * use __event_sched_in() to delay updating tstamp_running - * until the transaction is committed. In case of failure - * we will keep an unmodified tstamp_running which is a - * requirement to get correct timing information - */ - if (__event_sched_in(group_event, cpuctx, ctx)) { + if (event_sched_in(group_event, cpuctx, ctx)) { pmu->cancel_txn(pmu); return -EAGAIN; } @@ -751,31 +710,42 @@ group_sched_in(struct perf_event *group_event, * Schedule in siblings as one group (if any): */ list_for_each_entry(event, &group_event->sibling_list, group_entry) { - if (__event_sched_in(event, cpuctx, ctx)) { + if (event_sched_in(event, cpuctx, ctx)) { partial_group = event; goto group_error; } } - if (!pmu->commit_txn(pmu)) { - /* commit tstamp_running */ - group_commit_event_sched_in(group_event, cpuctx, ctx); + if (!pmu->commit_txn(pmu)) return 0; - } + group_error: /* * Groups can be scheduled in as one unit only, so undo any * partial group before returning: + * The events up to the failed event are scheduled out normally, + * tstamp_stopped will be updated. * - * use __event_sched_out() to avoid updating tstamp_stopped - * because the event never actually ran + * The failed events and the remaining siblings need to have + * their timings updated as if they had gone thru event_sched_in() + * and event_sched_out(). This is required to get consistent timings + * across the group. This also takes care of the case where the group + * could never be scheduled by ensuring tstamp_stopped is set to mark + * the time the event was actually stopped, such that time delta + * calculation in update_event_times() is correct. */ list_for_each_entry(event, &group_event->sibling_list, group_entry) { if (event == partial_group) - break; - __event_sched_out(event, cpuctx, ctx); + simulate = true; + + if (simulate) { + event->tstamp_running += now - event->tstamp_stopped; + event->tstamp_stopped = now; + } else { + event_sched_out(event, cpuctx, ctx); + } } - __event_sched_out(group_event, cpuctx, ctx); + event_sched_out(group_event, cpuctx, ctx); pmu->cancel_txn(pmu); @@ -3428,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) } static void perf_output_read_one(struct perf_output_handle *handle, - struct perf_event *event) + struct perf_event *event, + u64 enabled, u64 running) { u64 read_format = event->attr.read_format; u64 values[4]; @@ -3436,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, values[n++] = perf_event_count(event); if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = event->total_time_enabled + + values[n++] = enabled + atomic64_read(&event->child_total_time_enabled); } if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = event->total_time_running + + values[n++] = running + atomic64_read(&event->child_total_time_running); } if (read_format & PERF_FORMAT_ID) @@ -3453,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. */ static void perf_output_read_group(struct perf_output_handle *handle, - struct perf_event *event) + struct perf_event *event, + u64 enabled, u64 running) { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; @@ -3463,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = leader->total_time_enabled; + values[n++] = enabled; if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = leader->total_time_running; + values[n++] = running; if (leader != event) leader->pmu->read(leader); @@ -3491,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, } } +#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ + PERF_FORMAT_TOTAL_TIME_RUNNING) + static void perf_output_read(struct perf_output_handle *handle, struct perf_event *event) { + u64 enabled = 0, running = 0, now, ctx_time; + u64 read_format = event->attr.read_format; + + /* + * compute total_time_enabled, total_time_running + * based on snapshot values taken when the event + * was last scheduled in. + * + * we cannot simply called update_context_time() + * because of locking issue as we are called in + * NMI context + */ + if (read_format & PERF_FORMAT_TOTAL_TIMES) { + now = perf_clock(); + ctx_time = event->shadow_ctx_time + now; + enabled = ctx_time - event->tstamp_enabled; + running = ctx_time - event->tstamp_running; + } + if (event->attr.read_format & PERF_FORMAT_GROUP) - perf_output_read_group(handle, event); + perf_output_read_group(handle, event, enabled, running); else - perf_output_read_one(handle, event); + perf_output_read_one(handle, event, enabled, running); } void perf_output_sample(struct perf_output_handle *handle, |