summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c27
-rw-r--r--kernel/events/core.c20
-rw-r--r--kernel/printk.c9
-rw-r--r--kernel/ptrace.c72
-rw-r--r--kernel/rcutree_plugin.h13
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/signal.c19
-rw-r--r--kernel/smp.c13
-rw-r--r--kernel/trace/ftrace.c2
12 files changed, 134 insertions, 52 deletions
diff --git a/kernel/async.c b/kernel/async.c
index a1d585c351d..6f34904a0b5 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -86,18 +86,27 @@ static atomic_t entry_count;
*/
static async_cookie_t __lowest_in_progress(struct async_domain *running)
{
+ async_cookie_t first_running = next_cookie; /* infinity value */
+ async_cookie_t first_pending = next_cookie; /* ditto */
struct async_entry *entry;
+ /*
+ * Both running and pending lists are sorted but not disjoint.
+ * Take the first cookies from both and return the min.
+ */
if (!list_empty(&running->domain)) {
entry = list_first_entry(&running->domain, typeof(*entry), list);
- return entry->cookie;
+ first_running = entry->cookie;
}
- list_for_each_entry(entry, &async_pending, list)
- if (entry->running == running)
- return entry->cookie;
+ list_for_each_entry(entry, &async_pending, list) {
+ if (entry->running == running) {
+ first_pending = entry->cookie;
+ break;
+ }
+ }
- return next_cookie; /* "infinity" value */
+ return min(first_running, first_pending);
}
static async_cookie_t lowest_in_progress(struct async_domain *running)
@@ -118,13 +127,17 @@ static void async_run_entry_fn(struct work_struct *work)
{
struct async_entry *entry =
container_of(work, struct async_entry, work);
+ struct async_entry *pos;
unsigned long flags;
ktime_t uninitialized_var(calltime), delta, rettime;
struct async_domain *running = entry->running;
- /* 1) move self to the running queue */
+ /* 1) move self to the running queue, make sure it stays sorted */
spin_lock_irqsave(&async_lock, flags);
- list_move_tail(&entry->list, &running->domain);
+ list_for_each_entry_reverse(pos, &running->domain, list)
+ if (entry->cookie < pos->cookie)
+ break;
+ list_move_tail(&entry->list, &pos->list);
spin_unlock_irqrestore(&async_lock, flags);
/* 2) run (and print duration) */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 301079d06f2..7b6646a8c06 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -908,6 +908,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
}
/*
+ * Initialize event state based on the perf_event_attr::disabled.
+ */
+static inline void perf_event__state_init(struct perf_event *event)
+{
+ event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
+ PERF_EVENT_STATE_INACTIVE;
+}
+
+/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
@@ -6179,8 +6188,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->overflow_handler = overflow_handler;
event->overflow_handler_context = context;
- if (attr->disabled)
- event->state = PERF_EVENT_STATE_OFF;
+ perf_event__state_init(event);
pmu = NULL;
@@ -6609,9 +6617,17 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_lock(&gctx->mutex);
perf_remove_from_context(group_leader);
+
+ /*
+ * Removing from the context ends up with disabled
+ * event. What we want here is event in the initial
+ * startup state, ready to be add into new context.
+ */
+ perf_event__state_init(group_leader);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
perf_remove_from_context(sibling);
+ perf_event__state_init(sibling);
put_ctx(gctx);
}
mutex_unlock(&gctx->mutex);
diff --git a/kernel/printk.c b/kernel/printk.c
index 357f714ddd4..267ce780abe 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -87,12 +87,6 @@ static DEFINE_SEMAPHORE(console_sem);
struct console *console_drivers;
EXPORT_SYMBOL_GPL(console_drivers);
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map console_lock_dep_map = {
- .name = "console_lock"
-};
-#endif
-
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
@@ -1924,7 +1918,6 @@ void console_lock(void)
return;
console_locked = 1;
console_may_schedule = 1;
- mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
}
EXPORT_SYMBOL(console_lock);
@@ -1946,7 +1939,6 @@ int console_trylock(void)
}
console_locked = 1;
console_may_schedule = 0;
- mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
return 1;
}
EXPORT_SYMBOL(console_trylock);
@@ -2107,7 +2099,6 @@ skip:
local_irq_restore(flags);
}
console_locked = 0;
- mutex_release(&console_lock_dep_map, 1, _RET_IP_);
/* Release the exclusive_console once it is used */
if (unlikely(exclusive_console))
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 612a5612685..6cbeaae4406 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -117,11 +117,45 @@ void __ptrace_unlink(struct task_struct *child)
* TASK_KILLABLE sleeps.
*/
if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child))
- signal_wake_up(child, task_is_traced(child));
+ ptrace_signal_wake_up(child, true);
spin_unlock(&child->sighand->siglock);
}
+/* Ensure that nothing can wake it up, even SIGKILL */
+static bool ptrace_freeze_traced(struct task_struct *task)
+{
+ bool ret = false;
+
+ /* Lockless, nobody but us can set this flag */
+ if (task->jobctl & JOBCTL_LISTENING)
+ return ret;
+
+ spin_lock_irq(&task->sighand->siglock);
+ if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+ task->state = __TASK_TRACED;
+ ret = true;
+ }
+ spin_unlock_irq(&task->sighand->siglock);
+
+ return ret;
+}
+
+static void ptrace_unfreeze_traced(struct task_struct *task)
+{
+ if (task->state != __TASK_TRACED)
+ return;
+
+ WARN_ON(!task->ptrace || task->parent != current);
+
+ spin_lock_irq(&task->sighand->siglock);
+ if (__fatal_signal_pending(task))
+ wake_up_state(task, __TASK_TRACED);
+ else
+ task->state = TASK_TRACED;
+ spin_unlock_irq(&task->sighand->siglock);
+}
+
/**
* ptrace_check_attach - check whether ptracee is ready for ptrace operation
* @child: ptracee to check for
@@ -151,24 +185,29 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
* be changed by us so it's not changing right after this.
*/
read_lock(&tasklist_lock);
- if ((child->ptrace & PT_PTRACED) && child->parent == current) {
+ if (child->ptrace && child->parent == current) {
+ WARN_ON(child->state == __TASK_TRACED);
/*
* child->sighand can't be NULL, release_task()
* does ptrace_unlink() before __exit_signal().
*/
- spin_lock_irq(&child->sighand->siglock);
- WARN_ON_ONCE(task_is_stopped(child));
- if (ignore_state || (task_is_traced(child) &&
- !(child->jobctl & JOBCTL_LISTENING)))
+ if (ignore_state || ptrace_freeze_traced(child))
ret = 0;
- spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);
- if (!ret && !ignore_state)
- ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
+ if (!ret && !ignore_state) {
+ if (!wait_task_inactive(child, __TASK_TRACED)) {
+ /*
+ * This can only happen if may_ptrace_stop() fails and
+ * ptrace_stop() changes ->state back to TASK_RUNNING,
+ * so we should not worry about leaking __TASK_TRACED.
+ */
+ WARN_ON(child->state == __TASK_TRACED);
+ ret = -ESRCH;
+ }
+ }
- /* All systems go.. */
return ret;
}
@@ -317,7 +356,7 @@ static int ptrace_attach(struct task_struct *task, long request,
*/
if (task_is_stopped(task) &&
task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
- signal_wake_up(task, 1);
+ signal_wake_up_state(task, __TASK_STOPPED);
spin_unlock(&task->sighand->siglock);
@@ -737,7 +776,7 @@ int ptrace_request(struct task_struct *child, long request,
* tracee into STOP.
*/
if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP)))
- signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
+ ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
unlock_task_sighand(child, &flags);
ret = 0;
@@ -763,7 +802,7 @@ int ptrace_request(struct task_struct *child, long request,
* start of this trap and now. Trigger re-trap.
*/
if (child->jobctl & JOBCTL_TRAP_NOTIFY)
- signal_wake_up(child, true);
+ ptrace_signal_wake_up(child, true);
ret = 0;
}
unlock_task_sighand(child, &flags);
@@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
goto out_put_task_struct;
ret = arch_ptrace(child, request, addr, data);
+ if (ret || request != PTRACE_DETACH)
+ ptrace_unfreeze_traced(child);
out_put_task_struct:
put_task_struct(child);
@@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
ret = ptrace_check_attach(child, request == PTRACE_KILL ||
request == PTRACE_INTERRUPT);
- if (!ret)
+ if (!ret) {
ret = compat_arch_ptrace(child, request, addr, data);
+ if (ret || request != PTRACE_DETACH)
+ ptrace_unfreeze_traced(child);
+ }
out_put_task_struct:
put_task_struct(child);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index f6e5ec2932b..c1cc7e17ff9 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -40,8 +40,7 @@
#ifdef CONFIG_RCU_NOCB_CPU
static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
-static bool rcu_nocb_poll; /* Offload kthread are to poll. */
-module_param(rcu_nocb_poll, bool, 0444);
+static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
static char __initdata nocb_buf[NR_CPUS * 5];
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
@@ -2159,6 +2158,13 @@ static int __init rcu_nocb_setup(char *str)
}
__setup("rcu_nocbs=", rcu_nocb_setup);
+static int __init parse_rcu_nocb_poll(char *arg)
+{
+ rcu_nocb_poll = 1;
+ return 0;
+}
+early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
+
/* Is the specified CPU a no-CPUs CPU? */
static bool is_nocb_cpu(int cpu)
{
@@ -2366,10 +2372,11 @@ static int rcu_nocb_kthread(void *arg)
for (;;) {
/* If not polling, wait for next batch of callbacks. */
if (!rcu_nocb_poll)
- wait_event(rdp->nocb_wq, rdp->nocb_head);
+ wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
list = ACCESS_ONCE(rdp->nocb_head);
if (!list) {
schedule_timeout_interruptible(1);
+ flush_signals(current);
continue;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb..26058d0bebb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1523,7 +1523,8 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
- return try_to_wake_up(p, TASK_ALL, 0);
+ WARN_ON(task_is_stopped_or_traced(p));
+ return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2cd3c1b4e58..7ae4c4c5420 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -222,8 +222,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
cfs_rq->runnable_load_avg);
SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg",
cfs_rq->blocked_load_avg);
- SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg",
- atomic64_read(&cfs_rq->tg->load_avg));
+ SEQ_printf(m, " .%-30s: %lld\n", "tg_load_avg",
+ (unsigned long long)atomic64_read(&cfs_rq->tg->load_avg));
SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib",
cfs_rq->tg_load_contrib);
SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5eea8707234..81fa5364340 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2663,7 +2663,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
hrtimer_cancel(&cfs_b->slack_timer);
}
-static void unthrottle_offline_cfs_rqs(struct rq *rq)
+static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
{
struct cfs_rq *cfs_rq;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 418feb01344..4f02b284735 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -566,7 +566,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
static int do_balance_runtime(struct rt_rq *rt_rq)
{
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
- struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+ struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
int i, weight, more = 0;
u64 rt_period;
diff --git a/kernel/signal.c b/kernel/signal.c
index 53cd5c4d117..3d09cf6cde7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -680,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
* No need to set need_resched since signal event passing
* goes through ->blocked
*/
-void signal_wake_up(struct task_struct *t, int resume)
+void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
- unsigned int mask;
-
set_tsk_thread_flag(t, TIF_SIGPENDING);
-
/*
- * For SIGKILL, we want to wake it up in the stopped/traced/killable
+ * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
* case. We don't check t->state here because there is a race with it
* executing another processor and just now entering stopped state.
* By using wake_up_state, we ensure the process will wake up and
* handle its death signal.
*/
- mask = TASK_INTERRUPTIBLE;
- if (resume)
- mask |= TASK_WAKEKILL;
- if (!wake_up_state(t, mask))
+ if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
kick_process(t);
}
@@ -844,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t)
assert_spin_locked(&t->sighand->siglock);
task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
- signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
+ ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
}
/*
@@ -1800,6 +1794,10 @@ static inline int may_ptrace_stop(void)
* If SIGKILL was already sent before the caller unlocked
* ->siglock we must see ->core_state != NULL. Otherwise it
* is safe to enter schedule().
+ *
+ * This is almost outdated, a task with the pending SIGKILL can't
+ * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
+ * after SIGKILL was already dequeued.
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
@@ -1925,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
if (gstop_done)
do_notify_parent_cldstop(current, false, why);
+ /* tasklist protects us from ptrace_freeze_traced() */
__set_current_state(TASK_RUNNING);
if (clear_code)
current->exit_code = 0;
diff --git a/kernel/smp.c b/kernel/smp.c
index 29dd40a9f2f..69f38bd98b4 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -33,6 +33,7 @@ struct call_function_data {
struct call_single_data csd;
atomic_t refs;
cpumask_var_t cpumask;
+ cpumask_var_t cpumask_ipi;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
@@ -56,6 +57,9 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
cpu_to_node(cpu)))
return notifier_from_errno(-ENOMEM);
+ if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
+ cpu_to_node(cpu)))
+ return notifier_from_errno(-ENOMEM);
break;
#ifdef CONFIG_HOTPLUG_CPU
@@ -65,6 +69,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD:
case CPU_DEAD_FROZEN:
free_cpumask_var(cfd->cpumask);
+ free_cpumask_var(cfd->cpumask_ipi);
break;
#endif
};
@@ -526,6 +531,12 @@ void smp_call_function_many(const struct cpumask *mask,
return;
}
+ /*
+ * After we put an entry into the list, data->cpumask
+ * may be cleared again when another CPU sends another IPI for
+ * a SMP function call, so data->cpumask will be zero.
+ */
+ cpumask_copy(data->cpumask_ipi, data->cpumask);
raw_spin_lock_irqsave(&call_function.lock, flags);
/*
* Place entry at the _HEAD_ of the list, so that any cpu still
@@ -549,7 +560,7 @@ void smp_call_function_many(const struct cpumask *mask,
smp_mb();
/* Send a message to all CPUs in the map */
- arch_send_call_function_ipi_mask(data->cpumask);
+ arch_send_call_function_ipi_mask(data->cpumask_ipi);
/* Optionally wait for the CPUs to complete */
if (wait)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3ffe4c5ad3f..41473b4ad7a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3998,7 +3998,7 @@ static int ftrace_module_notify(struct notifier_block *self,
struct notifier_block ftrace_module_nb = {
.notifier_call = ftrace_module_notify,
- .priority = 0,
+ .priority = INT_MAX, /* Run before anything that can use kprobes */
};
extern unsigned long __start_mcount_loc[];