diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 39 | ||||
-rw-r--r-- | kernel/irq_work.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 22 |
3 files changed, 51 insertions, 16 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index fa0b2d4ad83..661951ab8ae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -231,11 +231,29 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, #define NR_ACCUMULATED_SAMPLES 128 static DEFINE_PER_CPU(u64, running_sample_length); -void perf_sample_event_took(u64 sample_len_ns) +static void perf_duration_warn(struct irq_work *w) { + u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); u64 avg_local_sample_len; u64 local_samples_len; + + local_samples_len = __get_cpu_var(running_sample_length); + avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; + + printk_ratelimited(KERN_WARNING + "perf interrupt took too long (%lld > %lld), lowering " + "kernel.perf_event_max_sample_rate to %d\n", + avg_local_sample_len, allowed_ns >> 1, + sysctl_perf_event_sample_rate); +} + +static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn); + +void perf_sample_event_took(u64 sample_len_ns) +{ u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); + u64 avg_local_sample_len; + u64 local_samples_len; if (allowed_ns == 0) return; @@ -263,13 +281,14 @@ void perf_sample_event_took(u64 sample_len_ns) sysctl_perf_event_sample_rate = max_samples_per_tick * HZ; perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; - printk_ratelimited(KERN_WARNING - "perf samples too long (%lld > %lld), lowering " - "kernel.perf_event_max_sample_rate to %d\n", - avg_local_sample_len, allowed_ns, - sysctl_perf_event_sample_rate); - update_perf_cpu_limits(); + + if (!irq_work_queue(&perf_duration_work)) { + early_printk("perf interrupt took too long (%lld > %lld), lowering " + "kernel.perf_event_max_sample_rate to %d\n", + avg_local_sample_len, allowed_ns >> 1, + sysctl_perf_event_sample_rate); + } } static atomic64_t perf_event_id; @@ -1714,7 +1733,7 @@ group_sched_in(struct perf_event *group_event, struct perf_event_context *ctx) { struct perf_event *event, *partial_group = NULL; - struct pmu *pmu = group_event->pmu; + struct pmu *pmu = ctx->pmu; u64 now = ctx->time; bool simulate = false; @@ -2563,8 +2582,6 @@ static void perf_branch_stack_sched_in(struct task_struct *prev, if (cpuctx->ctx.nr_branch_stack > 0 && pmu->flush_branch_stack) { - pmu = cpuctx->ctx.pmu; - perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(pmu); @@ -6294,7 +6311,7 @@ static int perf_event_idx_default(struct perf_event *event) * Ensures all contexts with the same task_ctx_nr have the same * pmu_cpu_context too. */ -static void *find_pmu_context(int ctxn) +static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) { struct pmu *pmu; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 55fcce6065c..a82170e2fa7 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -61,11 +61,11 @@ void __weak arch_irq_work_raise(void) * * Can be re-enqueued while the callback is still in progress. */ -void irq_work_queue(struct irq_work *work) +bool irq_work_queue(struct irq_work *work) { /* Only queue if not already pending */ if (!irq_work_claim(work)) - return; + return false; /* Queue the entry and raise the IPI if needed. */ preempt_disable(); @@ -83,6 +83,8 @@ void irq_work_queue(struct irq_work *work) } preempt_enable(); + + return true; } EXPORT_SYMBOL_GPL(irq_work_queue); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index e854f420e03..c894614de14 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -31,9 +31,25 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, } /* The ftrace function trace is allowed only for root. */ - if (ftrace_event_is_function(tp_event) && - perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + if (ftrace_event_is_function(tp_event)) { + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* + * We don't allow user space callchains for function trace + * event, due to issues with page faults while tracing page + * fault handler and its overall trickiness nature. + */ + if (!p_event->attr.exclude_callchain_user) + return -EINVAL; + + /* + * Same reason to disable user stack dump as for user space + * callchains above. + */ + if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) + return -EINVAL; + } /* No tracing, just counting, so no obvious leak */ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) |