summaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c158
1 files changed, 107 insertions, 51 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 9aaf567c5da..b50b0f0c9aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,6 +75,9 @@
#include <asm/tlb.h>
#include <asm/irq_regs.h>
#include <asm/mutex.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
#include "sched_cpupri.h"
#include "workqueue_sched.h"
@@ -528,6 +531,12 @@ struct rq {
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
#endif
+#ifdef CONFIG_PARAVIRT
+ u64 prev_steal_time;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ u64 prev_steal_time_rq;
+#endif
/* calc_load related fields */
unsigned long calc_load_update;
@@ -581,7 +590,6 @@ static inline int cpu_of(struct rq *rq)
#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
- rcu_read_lock_held() || \
lockdep_is_held(&sched_domains_mutex))
/*
@@ -1921,10 +1929,28 @@ void account_system_vtime(struct task_struct *curr)
}
EXPORT_SYMBOL_GPL(account_system_vtime);
-static void update_rq_clock_task(struct rq *rq, s64 delta)
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+#ifdef CONFIG_PARAVIRT
+static inline u64 steal_ticks(u64 steal)
{
- s64 irq_delta;
+ if (unlikely(steal > NSEC_PER_SEC))
+ return div_u64(steal, TICK_NSEC);
+ return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
+}
+#endif
+
+static void update_rq_clock_task(struct rq *rq, s64 delta)
+{
+/*
+ * In theory, the compile should just see 0 here, and optimize out the call
+ * to sched_rt_avg_update. But I don't trust it...
+ */
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ s64 steal = 0, irq_delta = 0;
+#endif
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
/*
@@ -1947,12 +1973,35 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->prev_irq_time += irq_delta;
delta -= irq_delta;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ if (static_branch((&paravirt_steal_rq_enabled))) {
+ u64 st;
+
+ steal = paravirt_steal_clock(cpu_of(rq));
+ steal -= rq->prev_steal_time_rq;
+
+ if (unlikely(steal > delta))
+ steal = delta;
+
+ st = steal_ticks(steal);
+ steal = st * TICK_NSEC;
+
+ rq->prev_steal_time_rq += steal;
+
+ delta -= steal;
+ }
+#endif
+
rq->clock_task += delta;
- if (irq_delta && sched_feat(NONIRQ_POWER))
- sched_rt_avg_update(rq, irq_delta);
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
+ sched_rt_avg_update(rq, irq_delta + steal);
+#endif
}
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
static int irqtime_account_hi_update(void)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
@@ -1987,12 +2036,7 @@ static int irqtime_account_si_update(void)
#define sched_clock_irqtime (0)
-static void update_rq_clock_task(struct rq *rq, s64 delta)
-{
- rq->clock_task += delta;
-}
-
-#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+#endif
#include "sched_idletask.c"
#include "sched_fair.c"
@@ -3021,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_disable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(current);
+ perf_event_task_sched_in(prev, current);
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -3681,30 +3725,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
}
/*
- * Return sum_exec_runtime for the thread group.
- * In case the task is currently running, return the sum plus current's
- * pending runtime that have not been accounted yet.
- *
- * Note that the thread group might have other running tasks as well,
- * so the return value not includes other pending runtime that other
- * running tasks might have.
- */
-unsigned long long thread_group_sched_runtime(struct task_struct *p)
-{
- struct task_cputime totals;
- unsigned long flags;
- struct rq *rq;
- u64 ns;
-
- rq = task_rq_lock(p, &flags);
- thread_group_cputime(p, &totals);
- ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
- task_rq_unlock(rq, p, &flags);
-
- return ns;
-}
-
-/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
@@ -3845,6 +3865,25 @@ void account_idle_time(cputime_t cputime)
cpustat->idle = cputime64_add(cpustat->idle, cputime64);
}
+static __always_inline bool steal_account_process_tick(void)
+{
+#ifdef CONFIG_PARAVIRT
+ if (static_branch(&paravirt_steal_enabled)) {
+ u64 steal, st = 0;
+
+ steal = paravirt_steal_clock(smp_processor_id());
+ steal -= this_rq()->prev_steal_time;
+
+ st = steal_ticks(steal);
+ this_rq()->prev_steal_time += st * TICK_NSEC;
+
+ account_steal_time(st);
+ return st;
+ }
+#endif
+ return false;
+}
+
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -3876,6 +3915,9 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ if (steal_account_process_tick())
+ return;
+
if (irqtime_account_hi_update()) {
cpustat->irq = cputime64_add(cpustat->irq, tmp);
} else if (irqtime_account_si_update()) {
@@ -3929,6 +3971,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
return;
}
+ if (steal_account_process_tick())
+ return;
+
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -4210,9 +4255,9 @@ pick_next_task(struct rq *rq)
}
/*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
*/
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
@@ -4253,16 +4298,6 @@ need_resched:
if (to_wakeup)
try_to_wake_up_local(to_wakeup);
}
-
- /*
- * If we are going to sleep and we have plugged IO
- * queued, make sure to submit it to avoid deadlocks.
- */
- if (blk_needs_flush_plug(prev)) {
- raw_spin_unlock(&rq->lock);
- blk_schedule_flush_plug(prev);
- raw_spin_lock(&rq->lock);
- }
}
switch_count = &prev->nvcsw;
}
@@ -4300,6 +4335,26 @@ need_resched:
if (need_resched())
goto need_resched;
}
+
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+ if (!tsk->state)
+ return;
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+ * make sure to submit it to avoid deadlocks.
+ */
+ if (blk_needs_flush_plug(tsk))
+ blk_schedule_flush_plug(tsk);
+}
+
+asmlinkage void __sched schedule(void)
+{
+ struct task_struct *tsk = current;
+
+ sched_submit_work(tsk);
+ __schedule();
+}
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4366,7 +4421,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
do {
add_preempt_count_notrace(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
@@ -4394,7 +4449,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
- schedule();
+ __schedule();
local_irq_disable();
sub_preempt_count(PREEMPT_ACTIVE);
@@ -5519,7 +5574,7 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
add_preempt_count(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count(PREEMPT_ACTIVE);
}
@@ -7374,6 +7429,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
if (sd && (sd->flags & SD_OVERLAP))
free_sched_groups(sd->groups, 0);
+ kfree(*per_cpu_ptr(sdd->sd, j));
kfree(*per_cpu_ptr(sdd->sg, j));
kfree(*per_cpu_ptr(sdd->sgp, j));
}