diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 1 | ||||
-rw-r--r-- | kernel/cpuset.c | 106 | ||||
-rw-r--r-- | kernel/irq/chip.c | 35 | ||||
-rw-r--r-- | kernel/irq/manage.c | 22 | ||||
-rw-r--r-- | kernel/kprobes.c | 3 | ||||
-rw-r--r-- | kernel/kthread.c | 2 | ||||
-rw-r--r-- | kernel/lockdep.c | 9 | ||||
-rw-r--r-- | kernel/perf_event.c | 84 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 10 | ||||
-rw-r--r-- | kernel/rcupdate.c | 23 | ||||
-rw-r--r-- | kernel/resource.c | 44 | ||||
-rw-r--r-- | kernel/sched.c | 12 | ||||
-rw-r--r-- | kernel/slow-work.c | 2 | ||||
-rw-r--r-- | kernel/slow-work.h | 8 | ||||
-rw-r--r-- | kernel/softlockup.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 52 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 3 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 3 | ||||
-rw-r--r-- | kernel/timer.c | 1 | ||||
-rw-r--r-- | kernel/trace/Makefile | 2 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c (renamed from kernel/trace/trace_event_profile.c) | 50 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 29 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 72 |
25 files changed, 385 insertions, 208 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ef909a32975..e2769e13980 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -27,7 +27,6 @@ */ #include <linux/cgroup.h> -#include <linux/module.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ba401fab459..d10946748ec 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -920,9 +920,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * call to guarantee_online_mems(), as we know no one is changing * our task's cpuset. * - * Hold callback_mutex around the two modifications of our tasks - * mems_allowed to synchronize with cpuset_mems_allowed(). - * * While the mm_struct we are migrating is typically from some * other task, the task_struct mems_allowed that we are hacking * is for our current task, which must allocate new pages for that @@ -973,15 +970,20 @@ static void cpuset_change_nodemask(struct task_struct *p, struct cpuset *cs; int migrate; const nodemask_t *oldmem = scan->data; - nodemask_t newmems; + NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); + + if (!newmems) + return; cs = cgroup_cs(scan->cg); - guarantee_online_mems(cs, &newmems); + guarantee_online_mems(cs, newmems); task_lock(p); - cpuset_change_task_nodemask(p, &newmems); + cpuset_change_task_nodemask(p, newmems); task_unlock(p); + NODEMASK_FREE(newmems); + mm = get_task_mm(p); if (!mm) return; @@ -1051,16 +1053,21 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { - nodemask_t oldmem; + NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL); int retval; struct ptr_heap heap; + if (!oldmem) + return -ENOMEM; + /* * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; * it's read-only */ - if (cs == &top_cpuset) - return -EACCES; + if (cs == &top_cpuset) { + retval = -EACCES; + goto done; + } /* * An empty mems_allowed is ok iff there are no tasks in the cpuset. @@ -1076,11 +1083,13 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, goto done; if (!nodes_subset(trialcs->mems_allowed, - node_states[N_HIGH_MEMORY])) - return -EINVAL; + node_states[N_HIGH_MEMORY])) { + retval = -EINVAL; + goto done; + } } - oldmem = cs->mems_allowed; - if (nodes_equal(oldmem, trialcs->mems_allowed)) { + *oldmem = cs->mems_allowed; + if (nodes_equal(*oldmem, trialcs->mems_allowed)) { retval = 0; /* Too easy - nothing to do */ goto done; } @@ -1096,10 +1105,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, cs->mems_allowed = trialcs->mems_allowed; mutex_unlock(&callback_mutex); - update_tasks_nodemask(cs, &oldmem, &heap); + update_tasks_nodemask(cs, oldmem, &heap); heap_free(&heap); done: + NODEMASK_FREE(oldmem); return retval; } @@ -1384,40 +1394,47 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *oldcont, struct task_struct *tsk, bool threadgroup) { - nodemask_t from, to; struct mm_struct *mm; struct cpuset *cs = cgroup_cs(cont); struct cpuset *oldcs = cgroup_cs(oldcont); + NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); + NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL); + + if (from == NULL || to == NULL) + goto alloc_fail; if (cs == &top_cpuset) { cpumask_copy(cpus_attach, cpu_possible_mask); - to = node_possible_map; } else { guarantee_online_cpus(cs, cpus_attach); - guarantee_online_mems(cs, &to); } + guarantee_online_mems(cs, to); /* do per-task migration stuff possibly for each in the threadgroup */ - cpuset_attach_task(tsk, &to, cs); + cpuset_attach_task(tsk, to, cs); if (threadgroup) { struct task_struct *c; rcu_read_lock(); list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { - cpuset_attach_task(c, &to, cs); + cpuset_attach_task(c, to, cs); } rcu_read_unlock(); } /* change mm; only needs to be done once even if threadgroup */ - from = oldcs->mems_allowed; - to = cs->mems_allowed; + *from = oldcs->mems_allowed; + *to = cs->mems_allowed; mm = get_task_mm(tsk); if (mm) { - mpol_rebind_mm(mm, &to); + mpol_rebind_mm(mm, to); if (is_memory_migrate(cs)) - cpuset_migrate_mm(mm, &from, &to); + cpuset_migrate_mm(mm, from, to); mmput(mm); } + +alloc_fail: + NODEMASK_FREE(from); + NODEMASK_FREE(to); } /* The various types of files and directories in a cpuset file system */ @@ -1562,13 +1579,21 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) { - nodemask_t mask; + NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); + int retval; + + if (mask == NULL) + return -ENOMEM; mutex_lock(&callback_mutex); - mask = cs->mems_allowed; + *mask = cs->mems_allowed; mutex_unlock(&callback_mutex); - return nodelist_scnprintf(page, PAGE_SIZE, mask); + retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); + + NODEMASK_FREE(mask); + + return retval; } static ssize_t cpuset_common_file_read(struct cgroup *cont, @@ -1997,7 +2022,10 @@ static void scan_for_empty_cpusets(struct cpuset *root) struct cpuset *cp; /* scans cpusets being updated */ struct cpuset *child; /* scans child cpusets of cp */ struct cgroup *cont; - nodemask_t oldmems; + NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); + + if (oldmems == NULL) + return; list_add_tail((struct list_head *)&root->stack_list, &queue); @@ -2014,7 +2042,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) continue; - oldmems = cp->mems_allowed; + *oldmems = cp->mems_allowed; /* Remove offline cpus and mems from this cpuset. */ mutex_lock(&callback_mutex); @@ -2030,9 +2058,10 @@ static void scan_for_empty_cpusets(struct cpuset *root) remove_tasks_in_empty_cpuset(cp); else { update_tasks_cpumask(cp, NULL); - update_tasks_nodemask(cp, &oldmems, NULL); + update_tasks_nodemask(cp, oldmems, NULL); } } + NODEMASK_FREE(oldmems); } /* @@ -2090,20 +2119,33 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, static int cpuset_track_online_nodes(struct notifier_block *self, unsigned long action, void *arg) { + NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); + + if (oldmems == NULL) + return NOTIFY_DONE; + cgroup_lock(); switch (action) { case MEM_ONLINE: - case MEM_OFFLINE: + *oldmems = top_cpuset.mems_allowed; mutex_lock(&callback_mutex); top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; mutex_unlock(&callback_mutex); - if (action == MEM_OFFLINE) - scan_for_empty_cpusets(&top_cpuset); + update_tasks_nodemask(&top_cpuset, oldmems, NULL); + break; + case MEM_OFFLINE: + /* + * needn't update top_cpuset.mems_allowed explicitly because + * scan_for_empty_cpusets() will update it. + */ + scan_for_empty_cpusets(&top_cpuset); break; default: break; } cgroup_unlock(); + + NODEMASK_FREE(oldmems); return NOTIFY_OK; } #endif diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 42ec11b2af8..b7091d5ca2f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -359,6 +359,23 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) if (desc->chip->ack) desc->chip->ack(irq); } + desc->status |= IRQ_MASKED; +} + +static inline void mask_irq(struct irq_desc *desc, int irq) +{ + if (desc->chip->mask) { + desc->chip->mask(irq); + desc->status |= IRQ_MASKED; + } +} + +static inline void unmask_irq(struct irq_desc *desc, int irq) +{ + if (desc->chip->unmask) { + desc->chip->unmask(irq); + desc->status &= ~IRQ_MASKED; + } } /* @@ -484,10 +501,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) raw_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; - if (unlikely(desc->status & IRQ_ONESHOT)) - desc->status |= IRQ_MASKED; - else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) - desc->chip->unmask(irq); + if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT))) + unmask_irq(desc, irq); out_unlock: raw_spin_unlock(&desc->lock); } @@ -524,8 +539,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) action = desc->action; if (unlikely(!action || (desc->status & IRQ_DISABLED))) { desc->status |= IRQ_PENDING; - if (desc->chip->mask) - desc->chip->mask(irq); + mask_irq(desc, irq); goto out; } @@ -593,7 +607,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) irqreturn_t action_ret; if (unlikely(!action)) { - desc->chip->mask(irq); + mask_irq(desc, irq); goto out_unlock; } @@ -605,8 +619,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) if (unlikely((desc->status & (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_MASKED))) { - desc->chip->unmask(irq); - desc->status &= ~IRQ_MASKED; + unmask_irq(desc, irq); } desc->status &= ~IRQ_PENDING; @@ -716,7 +729,7 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, __set_irq_handler(irq, handle, 0, name); } -void __init set_irq_noprobe(unsigned int irq) +void set_irq_noprobe(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -731,7 +744,7 @@ void __init set_irq_noprobe(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } -void __init set_irq_probe(unsigned int irq) +void set_irq_probe(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index eb6078ca60c..398fda155f6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -382,6 +382,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action; + unsigned long flags; if (!desc) return 0; @@ -389,11 +390,14 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) if (desc->status & IRQ_NOREQUEST) return 0; + raw_spin_lock_irqsave(&desc->lock, flags); action = desc->action; if (action) if (irqflags & action->flags & IRQF_SHARED) action = NULL; + raw_spin_unlock_irqrestore(&desc->lock, flags); + return !action; } @@ -483,8 +487,26 @@ static int irq_wait_for_interrupt(struct irqaction *action) */ static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) { +again: chip_bus_lock(irq, desc); raw_spin_lock_irq(&desc->lock); + + /* + * Implausible though it may be we need to protect us against + * the following scenario: + * + * The thread is faster done than the hard interrupt handler + * on the other CPU. If we unmask the irq line then the + * interrupt can come in again and masks the line, leaves due + * to IRQ_INPROGRESS and the irq line is masked forever. + */ + if (unlikely(desc->status & IRQ_INPROGRESS)) { + raw_spin_unlock_irq(&desc->lock); + chip_bus_sync_unlock(irq, desc); + cpu_relax(); + goto again; + } + if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { desc->status &= ~IRQ_MASKED; desc->chip->unmask(irq); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index fa034d29cf7..0ed46f3e51e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -259,7 +259,8 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, struct kprobe_insn_page *kip; list_for_each_entry(kip, &c->pages, list) { - long idx = ((long)slot - (long)kip->insns) / c->insn_size; + long idx = ((long)slot - (long)kip->insns) / + (c->insn_size * sizeof(kprobe_opcode_t)); if (idx >= 0 && idx < slots_per_page(c)) { WARN_ON(kip->slot_used[idx] != SLOT_USED); if (dirty) { diff --git a/kernel/kthread.c b/kernel/kthread.c index 82ed0ea1519..83911c78017 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -219,7 +219,7 @@ int kthreadd(void *unused) set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); set_cpus_allowed_ptr(tsk, cpu_all_mask); - set_mems_allowed(node_possible_map); + set_mems_allowed(node_states[N_HIGH_MEMORY]); current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 681bc2e1e18..c927a549db2 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3211,8 +3211,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, { unsigned long flags; - trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); - if (unlikely(current->lockdep_recursion)) return; @@ -3220,6 +3218,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, check_flags(flags); current->lockdep_recursion = 1; + trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, irqs_disabled_flags(flags), nest_lock, ip, 0); current->lockdep_recursion = 0; @@ -3232,14 +3231,13 @@ void lock_release(struct lockdep_map *lock, int nested, { unsigned long flags; - trace_lock_release(lock, nested, ip); - if (unlikely(current->lockdep_recursion)) return; raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; + trace_lock_release(lock, nested, ip); __lock_release(lock, nested, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); @@ -3413,8 +3411,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - trace_lock_contended(lock, ip); - if (unlikely(!lock_stat)) return; @@ -3424,6 +3420,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; + trace_lock_contended(lock, ip); __lock_contended(lock, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 4393b9e7374..574ee58a304 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -81,10 +81,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } -void __weak hw_perf_event_setup(int cpu) { barrier(); } -void __weak hw_perf_event_setup_online(int cpu) { barrier(); } -void __weak hw_perf_event_setup_offline(int cpu) { barrier(); } - int __weak hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, @@ -97,25 +93,15 @@ void __weak perf_event_print_debug(void) { } static DEFINE_PER_CPU(int, perf_disable_count); -void __perf_disable(void) -{ - __get_cpu_var(perf_disable_count)++; -} - -bool __perf_enable(void) -{ - return !--__get_cpu_var(perf_disable_count); -} - void perf_disable(void) { - __perf_disable(); - hw_perf_disable(); + if (!__get_cpu_var(perf_disable_count)++) + hw_perf_disable(); } void perf_enable(void) { - if (__perf_enable()) + if (!--__get_cpu_var(perf_disable_count)) hw_perf_enable(); } @@ -1538,12 +1524,15 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); + perf_disable(); event->pmu->unthrottle(event); + perf_enable(); } if (!event->attr.freq || !event->attr.sample_freq) continue; + perf_disable(); event->pmu->read(event); now = atomic64_read(&event->count); delta = now - hwc->freq_count_stamp; @@ -1551,6 +1540,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) if (delta > 0) perf_adjust_period(event, TICK_NSEC, delta); + perf_enable(); } raw_spin_unlock(&ctx->lock); } @@ -1560,9 +1550,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ static void rotate_ctx(struct perf_event_context *ctx) { - if (!ctx->nr_events) - return; - raw_spin_lock(&ctx->lock); /* Rotate the first entry last of non-pinned groups */ @@ -1575,19 +1562,28 @@ void perf_event_task_tick(struct task_struct *curr) { struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; + int rotate = 0; if (!atomic_read(&nr_events)) return; cpuctx = &__get_cpu_var(perf_cpu_context); - ctx = curr->perf_event_ctxp; + if (cpuctx->ctx.nr_events && + cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) + rotate = 1; - perf_disable(); + ctx = curr->perf_event_ctxp; + if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) + rotate = 1; perf_ctx_adjust_freq(&cpuctx->ctx); if (ctx) perf_ctx_adjust_freq(ctx); + if (!rotate) + return; + + perf_disable(); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_out(ctx, EVENT_FLEXIBLE); @@ -1599,7 +1595,6 @@ void perf_event_task_tick(struct task_struct *curr) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_in(curr, EVENT_FLEXIBLE); - perf_enable(); } @@ -2791,6 +2786,13 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return NULL; } +#ifdef CONFIG_EVENT_TRACING +__weak +void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) +{ +} +#endif + /* * Output */ @@ -4318,9 +4320,8 @@ static const struct pmu perf_ops_task_clock = { #ifdef CONFIG_EVENT_TRACING void perf_tp_event(int event_id, u64 addr, u64 count, void *record, - int entry_size) + int entry_size, struct pt_regs *regs) { - struct pt_regs *regs = get_irq_regs(); struct perf_sample_data data; struct perf_raw_record raw = { .size = entry_size, @@ -4330,12 +4331,9 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, perf_sample_data_init(&data, addr); data.raw = &raw; - if (!regs) - regs = task_pt_regs(current); - /* Trace events already protected against recursion */ do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, - &data, regs); + &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); @@ -4351,7 +4349,7 @@ static int perf_tp_event_match(struct perf_event *event, static void tp_perf_event_destroy(struct perf_event *event) { - ftrace_profile_disable(event->attr.config); + perf_trace_disable(event->attr.config); } static const struct pmu *tp_perf_event_init(struct perf_event *event) @@ -4365,7 +4363,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - if (ftrace_profile_enable(event->attr.config)) + if (perf_trace_enable(event->attr.config)) return NULL; event->destroy = tp_perf_event_destroy; @@ -5372,18 +5370,26 @@ int perf_event_init_task(struct task_struct *child) return ret; } +static void __init perf_event_init_all_cpus(void) +{ + int cpu; + struct perf_cpu_context *cpuctx; + + for_each_possible_cpu(cpu) { + cpuctx = &per_cpu(perf_cpu_context, cpu); + __perf_event_init_context(&cpuctx->ctx, NULL); + } +} + static void __cpuinit perf_event_init_cpu(int cpu) { struct perf_cpu_context *cpuctx; cpuctx = &per_cpu(perf_cpu_context, cpu); - __perf_event_init_context(&cpuctx->ctx, NULL); spin_lock(&perf_resource_lock); cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; spin_unlock(&perf_resource_lock); - - hw_perf_event_setup(cpu); } #ifdef CONFIG_HOTPLUG_CPU @@ -5423,20 +5429,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) perf_event_init_cpu(cpu); break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - hw_perf_event_setup_online(cpu); - break; - case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: perf_event_exit_cpu(cpu); break; - case CPU_DEAD: - hw_perf_event_setup_offline(cpu); - break; - default: break; } @@ -5454,6 +5451,7 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = { void __init perf_event_init(void) { + perf_event_init_all_cpus(); perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 1a22dfd42df..bc7704b3a44 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1061,9 +1061,9 @@ static void check_thread_timers(struct task_struct *tsk, } } -static void stop_process_timers(struct task_struct *tsk) +static void stop_process_timers(struct signal_struct *sig) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = &sig->cputimer; unsigned long flags; if (!cputimer->running) @@ -1072,6 +1072,10 @@ static void stop_process_timers(struct task_struct *tsk) spin_lock_irqsave(&cputimer->lock, flags); cputimer->running = 0; spin_unlock_irqrestore(&cputimer->lock, flags); + + sig->cputime_expires.prof_exp = cputime_zero; + sig->cputime_expires.virt_exp = cputime_zero; + sig->cputime_expires.sched_exp = 0; } static u32 onecputick; @@ -1133,7 +1137,7 @@ static void check_process_timers(struct task_struct *tsk, list_empty(&timers[CPUCLOCK_VIRT]) && cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && list_empty(&timers[CPUCLOCK_SCHED])) { - stop_process_timers(tsk); + stop_process_timers(sig); return; } diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f1125c1a632..63fe2543398 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,7 @@ #include <linux/mutex.h> #include <linux/module.h> #include <linux/kernel_stat.h> +#include <linux/hardirq.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -66,6 +67,28 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map); int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +/** + * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * + * Check for bottom half being disabled, which covers both the + * CONFIG_PROVE_RCU and not cases. Note that if someone uses + * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) + * will show the situation. + * + * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. + */ +int rcu_read_lock_bh_held(void) +{ + if (!debug_lockdep_rcu_enabled()) + return 1; + return in_softirq(); +} +EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); + +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + /* * This function is invoked towards the end of the scheduler's initialization * process. Before this is called, the idle task might contain diff --git a/kernel/resource.c b/kernel/resource.c index 2d5be5d9bf5..9c358e26353 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -219,19 +219,34 @@ void release_child_resources(struct resource *r) } /** - * request_resource - request and reserve an I/O or memory resource + * request_resource_conflict - request and reserve an I/O or memory resource * @root: root resource descriptor * @new: resource descriptor desired by caller * - * Returns 0 for success, negative error code on error. + * Returns 0 for success, conflict resource on error. */ -int request_resource(struct resource *root, struct resource *new) +struct resource *request_resource_conflict(struct resource *root, struct resource *new) { struct resource *conflict; write_lock(&resource_lock); conflict = __request_resource(root, new); write_unlock(&resource_lock); + return conflict; +} + +/** + * request_resource - request and reserve an I/O or memory resource + * @root: root resource descriptor + * @new: resource descriptor desired by caller + * + * Returns 0 for success, negative error code on error. + */ +int request_resource(struct resource *root, struct resource *new) +{ + struct resource *conflict; + + conflict = request_resource_conflict(root, new); return conflict ? -EBUSY : 0; } @@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou } /** - * insert_resource - Inserts a resource in the resource tree + * insert_resource_conflict - Inserts resource in the resource tree * @parent: parent of the new resource * @new: new resource to insert * - * Returns 0 on success, -EBUSY if the resource can't be inserted. + * Returns 0 on success, conflict resource if the resource can't be inserted. * - * This function is equivalent to request_resource when no conflict + * This function is equivalent to request_resource_conflict when no conflict * happens. If a conflict happens, and the conflicting resources * entirely fit within the range of the new resource, then the new * resource is inserted and the conflicting resources become children of * the new resource. */ -int insert_resource(struct resource *parent, struct resource *new) +struct resource *insert_resource_conflict(struct resource *parent, struct resource *new) { struct resource *conflict; write_lock(&resource_lock); conflict = __insert_resource(parent, new); write_unlock(&resource_lock); + return conflict; +} + +/** + * insert_resource - Inserts a resource in the resource tree + * @parent: parent of the new resource + * @new: new resource to insert + * + * Returns 0 on success, -EBUSY if the resource can't be inserted. + */ +int insert_resource(struct resource *parent, struct resource *new) +{ + struct resource *conflict; + + conflict = insert_resource_conflict(parent, new); return conflict ? -EBUSY : 0; } diff --git a/kernel/sched.c b/kernel/sched.c index 9ab3cd7858d..49d2fa7b687 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2650,7 +2650,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) { unsigned long flags; struct rq *rq; - int cpu = get_cpu(); + int cpu __maybe_unused = get_cpu(); #ifdef CONFIG_SMP /* @@ -4902,7 +4902,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, int ret; cpumask_var_t mask; - if (len < cpumask_size()) + if (len < nr_cpu_ids) + return -EINVAL; + if (len & (sizeof(unsigned long)-1)) return -EINVAL; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) @@ -4910,10 +4912,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, ret = sched_getaffinity(pid, mask); if (ret == 0) { - if (copy_to_user(user_mask_ptr, mask, cpumask_size())) + size_t retlen = min_t(size_t, len, cpumask_size()); + + if (copy_to_user(user_mask_ptr, mask, retlen)) ret = -EFAULT; else - ret = cpumask_size(); + ret = retlen; } free_cpumask_var(mask); diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 7494bbf5a27..7d3f4fa9ef4 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -637,7 +637,7 @@ int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, goto cancelled; /* the timer holds a reference whilst it is pending */ - ret = work->ops->get_ref(work); + ret = slow_work_get_ref(work); if (ret < 0) goto cant_get_ref; diff --git a/kernel/slow-work.h b/kernel/slow-work.h index 321f3c59d73..a29ebd1ef41 100644 --- a/kernel/slow-work.h +++ b/kernel/slow-work.h @@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); */ static inline void slow_work_set_thread_pid(int id, pid_t pid) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG slow_work_pids[id] = pid; #endif } static inline void slow_work_mark_time(struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG work->mark = CURRENT_TIME; #endif } static inline void slow_work_begin_exec(int id, struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG slow_work_execs[id] = work; #endif } static inline void slow_work_end_exec(int id, struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG write_lock(&slow_work_execs_lock); slow_work_execs[id] = NULL; write_unlock(&slow_work_execs_lock); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 0d4c7898ab8..4b493f67dcb 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -155,11 +155,11 @@ void softlockup_tick(void) * Wake up the high-prio watchdog task twice per * threshold timespan. */ - if (now > touch_ts + softlockup_thresh/2) + if (time_after(now - softlockup_thresh/2, touch_ts)) wake_up_process(per_cpu(softlockup_watchdog, this_cpu)); /* Warn about unreasonable delays: */ - if (now <= (touch_ts + softlockup_thresh)) + if (time_before_eq(now - softlockup_thresh, touch_ts)) return; per_cpu(softlockup_print_ts, this_cpu) = touch_ts; diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 0a8a213016f..aada0e52680 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -22,6 +22,29 @@ #include "tick-internal.h" +/* Limit min_delta to a jiffie */ +#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) + +static int tick_increase_min_delta(struct clock_event_device *dev) +{ + /* Nothing to do if we already reached the limit */ + if (dev->min_delta_ns >= MIN_DELTA_LIMIT) + return -ETIME; + + if (dev->min_delta_ns < 5000) + dev->min_delta_ns = 5000; + else + dev->min_delta_ns += dev->min_delta_ns >> 1; + + if (dev->min_delta_ns > MIN_DELTA_LIMIT) + dev->min_delta_ns = MIN_DELTA_LIMIT; + + printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", + dev->name ? dev->name : "?", + (unsigned long long) dev->min_delta_ns); + return 0; +} + /** * tick_program_event internal worker function */ @@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, if (!ret || !force) return ret; + dev->retries++; /* - * We tried 2 times to program the device with the given - * min_delta_ns. If that's not working then we double it + * We tried 3 times to program the device with the given + * min_delta_ns. If that's not working then we increase it * and emit a warning. */ if (++i > 2) { /* Increase the min. delta and try again */ - if (!dev->min_delta_ns) - dev->min_delta_ns = 5000; - else - dev->min_delta_ns += dev->min_delta_ns >> 1; - - printk(KERN_WARNING - "CE: %s increasing min_delta_ns to %llu nsec\n", - dev->name ? dev->name : "?", - (unsigned long long) dev->min_delta_ns << 1); - + if (tick_increase_min_delta(dev)) { + /* + * Get out of the loop if min_delta_ns + * hit the limit already. That's + * better than staying here forever. + * + * We clear next_event so we have a + * chance that the box survives. + */ + printk(KERN_WARNING + "CE: Reprogramming failure. Giving up\n"); + dev->next_event.tv64 = KTIME_MAX; + return -ETIME; + } i = 0; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 16736379a9c..39f6177fafa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -818,7 +818,8 @@ void update_wall_time(void) shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { offset = logarithmic_accumulation(offset, shift); - shift--; + if(offset < timekeeper.cycle_interval<<shift) + shift--; } /* correct the clock when NTP error is too big */ diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index bdfb8dd1050..1a4a7dd7877 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) SEQ_printf(m, " event_handler: "); print_name_offset(m, dev->event_handler); SEQ_printf(m, "\n"); + SEQ_printf(m, " retries: %lu\n", dev->retries); } static void timer_list_show_tickdevices(struct seq_file *m) @@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.5\n"); + SEQ_printf(m, "Timer List Version: v0.6\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); diff --git a/kernel/timer.c b/kernel/timer.c index c61a7949387..fc965eae0e8 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -880,6 +880,7 @@ int try_to_del_timer_sync(struct timer_list *timer) if (base->running_timer == timer) goto out; + timer_stats_timer_clear_start_info(timer); ret = 0; if (timer_pending(timer)) { detach_timer(timer, 1); diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d00c6fe23f5..78edc649003 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o ifeq ($(CONFIG_PERF_EVENTS),y) -obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o +obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 05a9f83b881..d1187ef20ca 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -207,6 +207,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on); #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ +#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +# define RB_FORCE_8BYTE_ALIGNMENT 0 +# define RB_ARCH_ALIGNMENT RB_ALIGNMENT +#else +# define RB_FORCE_8BYTE_ALIGNMENT 1 +# define RB_ARCH_ALIGNMENT 8U +#endif + /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX @@ -1547,7 +1555,7 @@ rb_update_event(struct ring_buffer_event *event, case 0: length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA) + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) event->array[0] = length; else event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); @@ -1722,11 +1730,11 @@ static unsigned rb_calculate_event_length(unsigned length) if (!length) length = 1; - if (length > RB_MAX_SMALL_DATA) + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) length += sizeof(event.array[0]); length += RB_EVNT_HDR_SIZE; - length = ALIGN(length, RB_ALIGNMENT); + length = ALIGN(length, RB_ARCH_ALIGNMENT); return length; } diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_perf.c index c1cc3ab633d..81f691eb3a3 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_perf.c @@ -1,32 +1,36 @@ /* - * trace event based perf counter profiling + * trace event based perf event profiling/tracing * * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> - * + * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> */ #include <linux/module.h> #include <linux/kprobes.h> #include "trace.h" +DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); +EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs); + +EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); static char *perf_trace_buf; static char *perf_trace_buf_nmi; -typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; +typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ; /* Count the events in use (per event id, not per instance) */ -static int total_profile_count; +static int total_ref_count; -static int ftrace_profile_enable_event(struct ftrace_event_call *event) +static int perf_trace_event_enable(struct ftrace_event_call *event) { char *buf; int ret = -ENOMEM; - if (event->profile_count++ > 0) + if (event->perf_refcount++ > 0) return 0; - if (!total_profile_count) { + if (!total_ref_count) { buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf; @@ -40,35 +44,35 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) rcu_assign_pointer(perf_trace_buf_nmi, buf); } - ret = event->profile_enable(event); + ret = event->perf_event_enable(event); if (!ret) { - total_profile_count++; + total_ref_count++; return 0; } fail_buf_nmi: - if (!total_profile_count) { + if (!total_ref_count) { free_percpu(perf_trace_buf_nmi); free_percpu(perf_trace_buf); perf_trace_buf_nmi = NULL; perf_trace_buf = NULL; } fail_buf: - event->profile_count--; + event->perf_refcount--; return ret; } -int ftrace_profile_enable(int event_id) +int perf_trace_enable(int event_id) { struct ftrace_event_call *event; int ret = -EINVAL; mutex_lock(&event_mutex); list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id && event->profile_enable && + if (event->id == event_id && event->perf_event_enable && try_module_get(event->mod)) { - ret = ftrace_profile_enable_event(event); + ret = perf_trace_event_enable(event); break; } } @@ -77,16 +81,16 @@ int ftrace_profile_enable(int event_id) return ret; } -static void ftrace_profile_disable_event(struct ftrace_event_call *event) +static void perf_trace_event_disable(struct ftrace_event_call *event) { char *buf, *nmi_buf; - if (--event->profile_count > 0) + if (--event->perf_refcount > 0) return; - event->profile_disable(event); + event->perf_event_disable(event); - if (!--total_profile_count) { + if (!--total_ref_count) { buf = perf_trace_buf; rcu_assign_pointer(perf_trace_buf, NULL); @@ -104,14 +108,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) } } -void ftrace_profile_disable(int event_id) +void perf_trace_disable(int event_id) { struct ftrace_event_call *event; mutex_lock(&event_mutex); list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) { - ftrace_profile_disable_event(event); + perf_trace_event_disable(event); module_put(event->mod); break; } @@ -119,8 +123,8 @@ void ftrace_profile_disable(int event_id) mutex_unlock(&event_mutex); } -__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, - int *rctxp, unsigned long *irq_flags) +__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, + int *rctxp, unsigned long *irq_flags) { struct trace_entry *entry; char *trace_buf, *raw_data; @@ -161,4 +165,4 @@ err_recursion: local_irq_restore(*irq_flags); return NULL; } -EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); +EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3f972ad98d0..beab8bf2f31 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -938,7 +938,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, trace_create_file("enable", 0644, call->dir, call, enable); - if (call->id && call->profile_enable) + if (call->id && call->perf_event_enable) trace_create_file("id", 0444, call->dir, call, id); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 505c92273b1..1251e367bae 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static __kprobes void kprobe_profile_func(struct kprobe *kp, +static __kprobes void kprobe_perf_func(struct kprobe *kp, struct pt_regs *regs) { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); @@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp, __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; - entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); if (!entry) return; @@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs); } /* Kretprobe profile handler */ -static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, +static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); @@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; - entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); if (!entry) return; @@ -1271,10 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); + perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, + irq_flags, regs); } -static int probe_profile_enable(struct ftrace_event_call *call) +static int probe_perf_enable(struct ftrace_event_call *call) { struct trace_probe *tp = (struct trace_probe *)call->data; @@ -1286,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call) return enable_kprobe(&tp->rp.kp); } -static void probe_profile_disable(struct ftrace_event_call *call) +static void probe_perf_disable(struct ftrace_event_call *call) { struct trace_probe *tp = (struct trace_probe *)call->data; @@ -1311,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(kp, regs); #ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) - kprobe_profile_func(kp, regs); + kprobe_perf_func(kp, regs); #endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1325,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) kretprobe_trace_func(ri, regs); #ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) - kretprobe_profile_func(ri, regs); + kretprobe_perf_func(ri, regs); #endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1358,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp) call->unregfunc = probe_event_disable; #ifdef CONFIG_PERF_EVENTS - call->profile_enable = probe_profile_enable; - call->profile_disable = probe_profile_disable; + call->perf_event_enable = probe_perf_enable; + call->perf_event_disable = probe_perf_disable; #endif call->data = tp; ret = trace_add_event_call(call); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index cba47d7935c..33c2a5b769d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -428,12 +428,12 @@ core_initcall(init_ftrace_syscalls); #ifdef CONFIG_PERF_EVENTS -static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); -static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); -static int sys_prof_refcount_enter; -static int sys_prof_refcount_exit; +static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); +static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); +static int sys_perf_refcount_enter; +static int sys_perf_refcount_exit; -static void prof_syscall_enter(struct pt_regs *regs, long id) +static void perf_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; @@ -443,7 +443,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) int size; syscall_nr = syscall_get_nr(current, regs); - if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) + if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -455,11 +455,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, - "profile buffer not large enough")) + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, + "perf buffer not large enough")) return; - rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, + rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, sys_data->enter_event->id, &rctx, &flags); if (!rec) return; @@ -467,10 +467,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); + perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); } -int prof_sysenter_enable(struct ftrace_event_call *call) +int perf_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; @@ -478,34 +478,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call) num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); - if (!sys_prof_refcount_enter) - ret = register_trace_sys_enter(prof_syscall_enter); + if (!sys_perf_refcount_enter) + ret = register_trace_sys_enter(perf_syscall_enter); if (ret) { pr_info("event trace: Could not activate" "syscall entry trace point"); } else { - set_bit(num, enabled_prof_enter_syscalls); - sys_prof_refcount_enter++; + set_bit(num, enabled_perf_enter_syscalls); + sys_perf_refcount_enter++; } mutex_unlock(&syscall_trace_lock); return ret; } -void prof_sysenter_disable(struct ftrace_event_call *call) +void perf_sysenter_disable(struct ftrace_event_call *call) { int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); - sys_prof_refcount_enter--; - clear_bit(num, enabled_prof_enter_syscalls); - if (!sys_prof_refcount_enter) - unregister_trace_sys_enter(prof_syscall_enter); + sys_perf_refcount_enter--; + clear_bit(num, enabled_perf_enter_syscalls); + if (!sys_perf_refcount_enter) + unregister_trace_sys_enter(perf_syscall_enter); mutex_unlock(&syscall_trace_lock); } -static void prof_syscall_exit(struct pt_regs *regs, long ret) +static void perf_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; @@ -515,7 +515,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) int size; syscall_nr = syscall_get_nr(current, regs); - if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) + if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -530,11 +530,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) * Impossible, but be paranoid with the future * How to put this check outside runtime? */ - if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, - "exit event has grown above profile buffer size")) + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, + "exit event has grown above perf buffer size")) return; - rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, + rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, sys_data->exit_event->id, &rctx, &flags); if (!rec) return; @@ -542,10 +542,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); + perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); } -int prof_sysexit_enable(struct ftrace_event_call *call) +int perf_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; @@ -553,30 +553,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call) num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); - if (!sys_prof_refcount_exit) - ret = register_trace_sys_exit(prof_syscall_exit); + if (!sys_perf_refcount_exit) + ret = register_trace_sys_exit(perf_syscall_exit); if (ret) { pr_info("event trace: Could not activate" "syscall exit trace point"); } else { - set_bit(num, enabled_prof_exit_syscalls); - sys_prof_refcount_exit++; + set_bit(num, enabled_perf_exit_syscalls); + sys_perf_refcount_exit++; } mutex_unlock(&syscall_trace_lock); return ret; } -void prof_sysexit_disable(struct ftrace_event_call *call) +void perf_sysexit_disable(struct ftrace_event_call *call) { int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); - sys_prof_refcount_exit--; - clear_bit(num, enabled_prof_exit_syscalls); - if (!sys_prof_refcount_exit) - unregister_trace_sys_exit(prof_syscall_exit); + sys_perf_refcount_exit--; + clear_bit(num, enabled_perf_exit_syscalls); + if (!sys_perf_refcount_exit) + unregister_trace_sys_exit(perf_syscall_exit); mutex_unlock(&syscall_trace_lock); } |