summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c39
-rw-r--r--kernel/irq_work.c6
-rw-r--r--kernel/trace/trace_event_perf.c22
3 files changed, 51 insertions, 16 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fa0b2d4ad83..661951ab8ae 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -231,11 +231,29 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
#define NR_ACCUMULATED_SAMPLES 128
static DEFINE_PER_CPU(u64, running_sample_length);
-void perf_sample_event_took(u64 sample_len_ns)
+static void perf_duration_warn(struct irq_work *w)
{
+ u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
u64 avg_local_sample_len;
u64 local_samples_len;
+
+ local_samples_len = __get_cpu_var(running_sample_length);
+ avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
+
+ printk_ratelimited(KERN_WARNING
+ "perf interrupt took too long (%lld > %lld), lowering "
+ "kernel.perf_event_max_sample_rate to %d\n",
+ avg_local_sample_len, allowed_ns >> 1,
+ sysctl_perf_event_sample_rate);
+}
+
+static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
+
+void perf_sample_event_took(u64 sample_len_ns)
+{
u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
+ u64 avg_local_sample_len;
+ u64 local_samples_len;
if (allowed_ns == 0)
return;
@@ -263,13 +281,14 @@ void perf_sample_event_took(u64 sample_len_ns)
sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
- printk_ratelimited(KERN_WARNING
- "perf samples too long (%lld > %lld), lowering "
- "kernel.perf_event_max_sample_rate to %d\n",
- avg_local_sample_len, allowed_ns,
- sysctl_perf_event_sample_rate);
-
update_perf_cpu_limits();
+
+ if (!irq_work_queue(&perf_duration_work)) {
+ early_printk("perf interrupt took too long (%lld > %lld), lowering "
+ "kernel.perf_event_max_sample_rate to %d\n",
+ avg_local_sample_len, allowed_ns >> 1,
+ sysctl_perf_event_sample_rate);
+ }
}
static atomic64_t perf_event_id;
@@ -1714,7 +1733,7 @@ group_sched_in(struct perf_event *group_event,
struct perf_event_context *ctx)
{
struct perf_event *event, *partial_group = NULL;
- struct pmu *pmu = group_event->pmu;
+ struct pmu *pmu = ctx->pmu;
u64 now = ctx->time;
bool simulate = false;
@@ -2563,8 +2582,6 @@ static void perf_branch_stack_sched_in(struct task_struct *prev,
if (cpuctx->ctx.nr_branch_stack > 0
&& pmu->flush_branch_stack) {
- pmu = cpuctx->ctx.pmu;
-
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
@@ -6294,7 +6311,7 @@ static int perf_event_idx_default(struct perf_event *event)
* Ensures all contexts with the same task_ctx_nr have the same
* pmu_cpu_context too.
*/
-static void *find_pmu_context(int ctxn)
+static struct perf_cpu_context __percpu *find_pmu_context(int ctxn)
{
struct pmu *pmu;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 55fcce6065c..a82170e2fa7 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -61,11 +61,11 @@ void __weak arch_irq_work_raise(void)
*
* Can be re-enqueued while the callback is still in progress.
*/
-void irq_work_queue(struct irq_work *work)
+bool irq_work_queue(struct irq_work *work)
{
/* Only queue if not already pending */
if (!irq_work_claim(work))
- return;
+ return false;
/* Queue the entry and raise the IPI if needed. */
preempt_disable();
@@ -83,6 +83,8 @@ void irq_work_queue(struct irq_work *work)
}
preempt_enable();
+
+ return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index e854f420e03..c894614de14 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -31,9 +31,25 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
}
/* The ftrace function trace is allowed only for root. */
- if (ftrace_event_is_function(tp_event) &&
- perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (ftrace_event_is_function(tp_event)) {
+ if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /*
+ * We don't allow user space callchains for function trace
+ * event, due to issues with page faults while tracing page
+ * fault handler and its overall trickiness nature.
+ */
+ if (!p_event->attr.exclude_callchain_user)
+ return -EINVAL;
+
+ /*
+ * Same reason to disable user stack dump as for user space
+ * callchains above.
+ */
+ if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER)
+ return -EINVAL;
+ }
/* No tracing, just counting, so no obvious leak */
if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))