diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 4 | ||||
-rw-r--r-- | kernel/cgroup.c | 10 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 116 | ||||
-rw-r--r-- | kernel/context_tracking.c | 2 | ||||
-rw-r--r-- | kernel/hrtimer.c | 30 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 7 | ||||
-rw-r--r-- | kernel/irq/manage.c | 17 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 2 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/printk/printk.c | 4 | ||||
-rw-r--r-- | kernel/sched/core.c | 10 | ||||
-rw-r--r-- | kernel/softirq.c | 9 | ||||
-rw-r--r-- | kernel/timer.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 27 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 2 | ||||
-rw-r--r-- | kernel/tracepoint.c | 4 | ||||
-rw-r--r-- | kernel/workqueue.c | 36 |
19 files changed, 156 insertions, 137 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 7c2893602d0..47845c57eb1 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -643,13 +643,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) if ((task_active_pid_ns(current) != &init_pid_ns)) return -EPERM; - if (!capable(CAP_AUDIT_CONTROL)) + if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) err = -EPERM; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: - if (!capable(CAP_AUDIT_WRITE)) + if (!netlink_capable(skb, CAP_AUDIT_WRITE)) err = -EPERM; break; default: /* bad msg */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9fcdaa705b6..3f1ca934a23 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -348,7 +348,7 @@ struct cgrp_cset_link { * reference-counted, to improve performance when child cgroups * haven't been created. */ -static struct css_set init_css_set = { +struct css_set init_css_set = { .refcount = ATOMIC_INIT(1), .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), .tasks = LIST_HEAD_INIT(init_css_set.tasks), @@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, */ if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); -retry: + mutex_lock(&cgroup_tree_mutex); mutex_lock(&cgroup_mutex); @@ -1503,7 +1503,7 @@ retry: ret = parse_cgroupfs_options(data, &opts); if (ret) goto out_unlock; - +retry: /* look for a matching existing root */ if (!opts.subsys_mask && !opts.none && !opts.name) { cgrp_dfl_root_visible = true; @@ -1562,9 +1562,9 @@ retry: if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_tree_mutex); - kfree(opts.release_agent); - kfree(opts.name); msleep(10); + mutex_lock(&cgroup_tree_mutex); + mutex_lock(&cgroup_mutex); goto retry; } diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2bc4a225644..345628c78b5 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -21,6 +21,7 @@ #include <linux/uaccess.h> #include <linux/freezer.h> #include <linux/seq_file.h> +#include <linux/mutex.h> /* * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is @@ -42,9 +43,10 @@ enum freezer_state_flags { struct freezer { struct cgroup_subsys_state css; unsigned int state; - spinlock_t lock; }; +static DEFINE_MUTEX(freezer_mutex); + static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) { return css ? container_of(css, struct freezer, css) : NULL; @@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css) if (!freezer) return ERR_PTR(-ENOMEM); - spin_lock_init(&freezer->lock); return &freezer->css; } @@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) struct freezer *freezer = css_freezer(css); struct freezer *parent = parent_freezer(freezer); - /* - * The following double locking and freezing state inheritance - * guarantee that @cgroup can never escape ancestors' freezing - * states. See css_for_each_descendant_pre() for details. - */ - if (parent) - spin_lock_irq(&parent->lock); - spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); + mutex_lock(&freezer_mutex); freezer->state |= CGROUP_FREEZER_ONLINE; @@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) atomic_inc(&system_freezing_cnt); } - spin_unlock(&freezer->lock); - if (parent) - spin_unlock_irq(&parent->lock); - + mutex_unlock(&freezer_mutex); return 0; } @@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css) { struct freezer *freezer = css_freezer(css); - spin_lock_irq(&freezer->lock); + mutex_lock(&freezer_mutex); if (freezer->state & CGROUP_FREEZING) atomic_dec(&system_freezing_cnt); freezer->state = 0; - spin_unlock_irq(&freezer->lock); + mutex_unlock(&freezer_mutex); } static void freezer_css_free(struct cgroup_subsys_state *css) @@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, struct task_struct *task; bool clear_frozen = false; - spin_lock_irq(&freezer->lock); + mutex_lock(&freezer_mutex); /* * Make the new tasks conform to the current state of @new_css. @@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, } } - spin_unlock_irq(&freezer->lock); - - /* - * Propagate FROZEN clearing upwards. We may race with - * update_if_frozen(), but as long as both work bottom-up, either - * update_if_frozen() sees child's FROZEN cleared or we clear the - * parent's FROZEN later. No parent w/ !FROZEN children can be - * left FROZEN. - */ + /* propagate FROZEN clearing upwards */ while (clear_frozen && (freezer = parent_freezer(freezer))) { - spin_lock_irq(&freezer->lock); freezer->state &= ~CGROUP_FROZEN; clear_frozen = freezer->state & CGROUP_FREEZING; - spin_unlock_irq(&freezer->lock); } + + mutex_unlock(&freezer_mutex); } /** @@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task) { struct freezer *freezer; - rcu_read_lock(); - freezer = task_freezer(task); - /* * The root cgroup is non-freezable, so we can skip locking the * freezer. This is safe regardless of race with task migration. @@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task) * to do. If we lost and root is the new cgroup, noop is still the * right thing to do. */ - if (!parent_freezer(freezer)) - goto out; + if (task_css_is_root(task, freezer_cgrp_id)) + return; - /* - * Grab @freezer->lock and freeze @task after verifying @task still - * belongs to @freezer and it's freezing. The former is for the - * case where we have raced against task migration and lost and - * @task is already in a different cgroup which may not be frozen. - * This isn't strictly necessary as freeze_task() is allowed to be - * called spuriously but let's do it anyway for, if nothing else, - * documentation. - */ - spin_lock_irq(&freezer->lock); - if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING)) + mutex_lock(&freezer_mutex); + rcu_read_lock(); + + freezer = task_freezer(task); + if (freezer->state & CGROUP_FREEZING) freeze_task(task); - spin_unlock_irq(&freezer->lock); -out: + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); } /** @@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css) struct css_task_iter it; struct task_struct *task; - WARN_ON_ONCE(!rcu_read_lock_held()); - - spin_lock_irq(&freezer->lock); + lockdep_assert_held(&freezer_mutex); if (!(freezer->state & CGROUP_FREEZING) || (freezer->state & CGROUP_FROZEN)) - goto out_unlock; + return; /* are all (live) children frozen? */ + rcu_read_lock(); css_for_each_child(pos, css) { struct freezer *child = css_freezer(pos); if ((child->state & CGROUP_FREEZER_ONLINE) && - !(child->state & CGROUP_FROZEN)) - goto out_unlock; + !(child->state & CGROUP_FROZEN)) { + rcu_read_unlock(); + return; + } } + rcu_read_unlock(); /* are all tasks frozen? */ css_task_iter_start(css, &it); @@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css) freezer->state |= CGROUP_FROZEN; out_iter_end: css_task_iter_end(&it); -out_unlock: - spin_unlock_irq(&freezer->lock); } static int freezer_read(struct seq_file *m, void *v) { struct cgroup_subsys_state *css = seq_css(m), *pos; + mutex_lock(&freezer_mutex); rcu_read_lock(); /* update states bottom-up */ - css_for_each_descendant_post(pos, css) + css_for_each_descendant_post(pos, css) { + if (!css_tryget(pos)) + continue; + rcu_read_unlock(); + update_if_frozen(pos); + rcu_read_lock(); + css_put(pos); + } + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); seq_puts(m, freezer_state_strs(css_freezer(css)->state)); seq_putc(m, '\n'); @@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, unsigned int state) { /* also synchronizes against task migration, see freezer_attach() */ - lockdep_assert_held(&freezer->lock); + lockdep_assert_held(&freezer_mutex); if (!(freezer->state & CGROUP_FREEZER_ONLINE)) return; @@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) * descendant will try to inherit its parent's FREEZING state as * CGROUP_FREEZING_PARENT. */ + mutex_lock(&freezer_mutex); rcu_read_lock(); css_for_each_descendant_pre(pos, &freezer->css) { struct freezer *pos_f = css_freezer(pos); struct freezer *parent = parent_freezer(pos_f); - spin_lock_irq(&pos_f->lock); + if (!css_tryget(pos)) + continue; + rcu_read_unlock(); - if (pos_f == freezer) { + if (pos_f == freezer) freezer_apply_state(pos_f, freeze, CGROUP_FREEZING_SELF); - } else { - /* - * Our update to @parent->state is already visible - * which is all we need. No need to lock @parent. - * For more info on synchronization, see - * freezer_post_create(). - */ + else freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING, CGROUP_FREEZING_PARENT); - } - spin_unlock_irq(&pos_f->lock); + rcu_read_lock(); + css_put(pos); } rcu_read_unlock(); + mutex_unlock(&freezer_mutex); } static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 6cb20d2e7ee..019d4500844 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -120,7 +120,7 @@ void context_tracking_user_enter(void) * instead of preempt_schedule() to exit user context if needed before * calling the scheduler. */ -asmlinkage void __sched notrace preempt_schedule_context(void) +asmlinkage __visible void __sched notrace preempt_schedule_context(void) { enum ctx_state prev_ctx; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d55092ceee2..e0501fe7140 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -234,6 +234,11 @@ again: goto again; } timer->base = new_base; + } else { + if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { + cpu = this_cpu; + goto again; + } } return new_base; } @@ -569,6 +574,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) cpu_base->expires_next.tv64 = expires_next.tv64; + /* + * If a hang was detected in the last timer interrupt then we + * leave the hang delay active in the hardware. We want the + * system to make progress. That also prevents the following + * scenario: + * T1 expires 50ms from now + * T2 expires 5s from now + * + * T1 is removed, so this code is called and would reprogram + * the hardware to 5s from now. Any hrtimer_start after that + * will not reprogram the hardware due to hang_detected being + * set. So we'd effectivly block all timers until the T2 event + * fires. + */ + if (cpu_base->hang_detected) + return; + if (cpu_base->expires_next.tv64 != KTIME_MAX) tick_program_event(cpu_base->expires_next, 1); } @@ -968,11 +990,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Remove an active timer from the queue: */ ret = remove_hrtimer(timer, base); - /* Switch the timer base, if necessary: */ - new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - if (mode & HRTIMER_MODE_REL) { - tim = ktime_add_safe(tim, new_base->get_time()); + tim = ktime_add_safe(tim, base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in @@ -987,6 +1006,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, hrtimer_set_expires_range_ns(timer, tim, delta_ns); + /* Switch the timer base, if necessary: */ + new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); + timer_stats_hrtimer_set_start_info(timer); leftmost = enqueue_hrtimer(timer, new_base); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a7174617616..bb07f2928f4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, if (from > irq) return -EINVAL; from = irq; + } else { + /* + * For interrupts which are freely allocated the + * architecture can force a lower bound to the @from + * argument. x86 uses this to exclude the GSI space. + */ + from = arch_dynirq_lower_bound(from); } mutex_lock(&sparse_irq_lock); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2486a4c1a71..d34131ca372 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -180,7 +180,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_chip *chip = irq_data_get_irq_chip(data); int ret; - ret = chip->irq_set_affinity(data, mask, false); + ret = chip->irq_set_affinity(data, mask, force); switch (ret) { case IRQ_SET_MASK_OK: cpumask_copy(data->affinity, mask); @@ -192,7 +192,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) +int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, + bool force) { struct irq_chip *chip = irq_data_get_irq_chip(data); struct irq_desc *desc = irq_data_to_desc(data); @@ -202,7 +203,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return -EINVAL; if (irq_can_move_pcntxt(data)) { - ret = irq_do_set_affinity(data, mask, false); + ret = irq_do_set_affinity(data, mask, force); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); @@ -217,13 +218,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return ret; } -/** - * irq_set_affinity - Set the irq affinity of a given irq - * @irq: Interrupt to set affinity - * @mask: cpumask - * - */ -int irq_set_affinity(unsigned int irq, const struct cpumask *mask) +int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -233,7 +228,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) return -EINVAL; raw_spin_lock_irqsave(&desc->lock, flags); - ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); + ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b0e9467922e..d24e4339b46 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4188,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); -asmlinkage void lockdep_sys_exit(void) +asmlinkage __visible void lockdep_sys_exit(void) { struct task_struct *curr = current; diff --git a/kernel/module.c b/kernel/module.c index 11869408f79..079c4615607 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -815,9 +815,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - if (!(flags & O_NONBLOCK)) - pr_warn("waiting module removal not supported: please upgrade\n"); - if (mutex_lock_interruptible(&module_mutex) != 0) return -EINTR; @@ -3271,6 +3268,9 @@ static int load_module(struct load_info *info, const char __user *uargs, dynamic_debug_setup(info->debug, info->num_debug); + /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ + ftrace_module_init(mod); + /* Finally it's fully formed, ready to start executing. */ err = complete_formation(mod, info); if (err) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 18fb7a2fb14..1ea328aafdc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1586,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, return -ENOMEM; } -asmlinkage int swsusp_save(void) +asmlinkage __visible int swsusp_save(void) { unsigned int nr_pages, nr_highmem; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c3ad9cafe93..8233cd4047d 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/console.h> #include <linux/cpu.h> +#include <linux/cpuidle.h> #include <linux/syscalls.h> #include <linux/gfp.h> #include <linux/io.h> @@ -53,7 +54,9 @@ static void freeze_begin(void) static void freeze_enter(void) { + cpuidle_resume(); wait_event(suspend_freeze_wait_head, suspend_freeze_wake); + cpuidle_pause(); } void freeze_wake(void) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a45b5096229..7228258b85e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1674,7 +1674,7 @@ EXPORT_SYMBOL(printk_emit); * * See the vsnprintf() documentation for format string extensions over C99. */ -asmlinkage int printk(const char *fmt, ...) +asmlinkage __visible int printk(const char *fmt, ...) { va_list args; int r; @@ -1737,7 +1737,7 @@ void early_vprintk(const char *fmt, va_list ap) } } -asmlinkage void early_printk(const char *fmt, ...) +asmlinkage __visible void early_printk(const char *fmt, ...) { va_list ap; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4b82622b625..092e511605e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2223,7 +2223,7 @@ static inline void post_schedule(struct rq *rq) * schedule_tail - first thing a freshly forked thread must call. * @prev: the thread we just switched away from. */ -asmlinkage void schedule_tail(struct task_struct *prev) +asmlinkage __visible void schedule_tail(struct task_struct *prev) __releases(rq->lock) { struct rq *rq = this_rq(); @@ -2778,7 +2778,7 @@ static inline void sched_submit_work(struct task_struct *tsk) blk_schedule_flush_plug(tsk); } -asmlinkage void __sched schedule(void) +asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; @@ -2788,7 +2788,7 @@ asmlinkage void __sched schedule(void) EXPORT_SYMBOL(schedule); #ifdef CONFIG_CONTEXT_TRACKING -asmlinkage void __sched schedule_user(void) +asmlinkage __visible void __sched schedule_user(void) { /* * If we come here after a random call to set_need_resched(), @@ -2820,7 +2820,7 @@ void __sched schedule_preempt_disabled(void) * off of preempt_enable. Kernel preemptions off return from interrupt * occur there and call schedule directly. */ -asmlinkage void __sched notrace preempt_schedule(void) +asmlinkage __visible void __sched notrace preempt_schedule(void) { /* * If there is a non-zero preempt_count or interrupts are disabled, @@ -2850,7 +2850,7 @@ EXPORT_SYMBOL(preempt_schedule); * Note, that this is called and return with irqs disabled. This will * protect us against recursive calling from irq. */ -asmlinkage void __sched preempt_schedule_irq(void) +asmlinkage __visible void __sched preempt_schedule_irq(void) { enum ctx_state prev_state; diff --git a/kernel/softirq.c b/kernel/softirq.c index b50990a5bea..92f24f5e8d5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -223,7 +223,7 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif -asmlinkage void __do_softirq(void) +asmlinkage __visible void __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; @@ -299,7 +299,7 @@ restart: tsk_restore_flags(current, old_flags, PF_MEMALLOC); } -asmlinkage void do_softirq(void) +asmlinkage __visible void do_softirq(void) { __u32 pending; unsigned long flags; @@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void) { return 0; } + +unsigned int __weak arch_dynirq_lower_bound(unsigned int from) +{ + return from; +} diff --git a/kernel/timer.c b/kernel/timer.c index 87bd529879c..3bb01a323b2 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -838,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) bit = find_last_bit(&mask, BITS_PER_LONG); - mask = (1 << bit) - 1; + mask = (1UL << bit) - 1; expires_limit = expires_limit & ~(mask); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b947921..4a54a25afa2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) +void ftrace_module_init(struct module *mod) { - struct module *mod = data; - - if (val == MODULE_STATE_COMING) - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); - return 0; + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, return 0; } #else -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} static int ftrace_module_notify_exit(struct notifier_block *self, unsigned long val, void *data) { @@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { - .notifier_call = ftrace_module_notify_enter, - .priority = INT_MAX, /* Run before anything that can use kprobes */ -}; - struct notifier_block ftrace_module_exit_nb = { .notifier_call = ftrace_module_notify_exit, .priority = INT_MIN, /* Run after anything that can remove kprobes */ @@ -4403,10 +4388,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_enter_nb); - if (ret) - pr_warning("Failed to register trace ftrace module enter notifier\n"); - ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n"); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537f07d..4747b476a03 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) data->ops->func(data); continue; } - filter = rcu_dereference(data->filter); + filter = rcu_dereference_sched(data->filter); if (filter && !filter_match_preds(filter, rec)) continue; if (data->cmd_ops->post_trigger) { diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index ac5b23cf721..6620e5837ce 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -188,7 +188,6 @@ static int tracepoint_add_func(struct tracepoint *tp, WARN_ON_ONCE(1); return PTR_ERR(old); } - release_probes(old); /* * rcu_assign_pointer has a smp_wmb() which makes sure that the new @@ -200,6 +199,7 @@ static int tracepoint_add_func(struct tracepoint *tp, rcu_assign_pointer(tp->funcs, tp_funcs); if (!static_key_enabled(&tp->key)) static_key_slow_inc(&tp->key); + release_probes(old); return 0; } @@ -221,7 +221,6 @@ static int tracepoint_remove_func(struct tracepoint *tp, WARN_ON_ONCE(1); return PTR_ERR(old); } - release_probes(old); if (!tp_funcs) { /* Removed last function */ @@ -232,6 +231,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, static_key_slow_dec(&tp->key); } rcu_assign_pointer(tp->funcs, tp_funcs); + release_probes(old); return 0; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c30c01b32ec..a4bab46cd38 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1916,6 +1916,12 @@ static void send_mayday(struct work_struct *work) /* mayday mayday mayday */ if (list_empty(&pwq->mayday_node)) { + /* + * If @pwq is for an unbound wq, its base ref may be put at + * any time due to an attribute change. Pin @pwq until the + * rescuer is done with it. + */ + get_pwq(pwq); list_add_tail(&pwq->mayday_node, &wq->maydays); wake_up_process(wq->rescuer->task); } @@ -2398,6 +2404,7 @@ static int rescuer_thread(void *__rescuer) struct worker *rescuer = __rescuer; struct workqueue_struct *wq = rescuer->rescue_wq; struct list_head *scheduled = &rescuer->scheduled; + bool should_stop; set_user_nice(current, RESCUER_NICE_LEVEL); @@ -2409,11 +2416,15 @@ static int rescuer_thread(void *__rescuer) repeat: set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) { - __set_current_state(TASK_RUNNING); - rescuer->task->flags &= ~PF_WQ_WORKER; - return 0; - } + /* + * By the time the rescuer is requested to stop, the workqueue + * shouldn't have any work pending, but @wq->maydays may still have + * pwq(s) queued. This can happen by non-rescuer workers consuming + * all the work items before the rescuer got to them. Go through + * @wq->maydays processing before acting on should_stop so that the + * list is always empty on exit. + */ + should_stop = kthread_should_stop(); /* see whether any pwq is asking for help */ spin_lock_irq(&wq_mayday_lock); @@ -2445,6 +2456,12 @@ repeat: process_scheduled_works(rescuer); /* + * Put the reference grabbed by send_mayday(). @pool won't + * go away while we're holding its lock. + */ + put_pwq(pwq); + + /* * Leave this pool. If keep_working() is %true, notify a * regular worker; otherwise, we end up with 0 concurrency * and stalling the execution. @@ -2459,6 +2476,12 @@ repeat: spin_unlock_irq(&wq_mayday_lock); + if (should_stop) { + __set_current_state(TASK_RUNNING); + rescuer->task->flags &= ~PF_WQ_WORKER; + return 0; + } + /* rescuers should never participate in concurrency management */ WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); schedule(); @@ -4100,7 +4123,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, if (!pwq) { pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", wq->name); - goto out_unlock; + mutex_lock(&wq->mutex); + goto use_dfl_pwq; } /* |