summaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c100
1 files changed, 68 insertions, 32 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e265273..ce1056e9b02 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -693,6 +693,7 @@ static inline int cpu_of(struct rq *rq)
#define this_rq() (&__get_cpu_var(runqueues))
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+#define raw_rq() (&__raw_get_cpu_var(runqueues))
inline void update_rq_clock(struct rq *rq)
{
@@ -2637,9 +2638,32 @@ void sched_fork(struct task_struct *p, int clone_flags)
set_task_cpu(p, cpu);
/*
- * Make sure we do not leak PI boosting priority to the child:
+ * Make sure we do not leak PI boosting priority to the child.
*/
p->prio = current->normal_prio;
+
+ /*
+ * Revert to default priority/policy on fork if requested.
+ */
+ if (unlikely(p->sched_reset_on_fork)) {
+ if (p->policy == SCHED_FIFO || p->policy == SCHED_RR)
+ p->policy = SCHED_NORMAL;
+
+ if (p->normal_prio < DEFAULT_PRIO)
+ p->prio = DEFAULT_PRIO;
+
+ if (PRIO_TO_NICE(p->static_prio) < 0) {
+ p->static_prio = NICE_TO_PRIO(0);
+ set_load_weight(p);
+ }
+
+ /*
+ * We don't need the reset flag anymore after the fork. It has
+ * fulfilled its duty:
+ */
+ p->sched_reset_on_fork = 0;
+ }
+
if (!rt_prio(p->prio))
p->sched_class = &fair_sched_class;
@@ -6123,17 +6147,25 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
unsigned long flags;
const struct sched_class *prev_class = p->sched_class;
struct rq *rq;
+ int reset_on_fork;
/* may grab non-irq protected spin_locks */
BUG_ON(in_interrupt());
recheck:
/* double check policy once rq lock held */
- if (policy < 0)
+ if (policy < 0) {
+ reset_on_fork = p->sched_reset_on_fork;
policy = oldpolicy = p->policy;
- else if (policy != SCHED_FIFO && policy != SCHED_RR &&
- policy != SCHED_NORMAL && policy != SCHED_BATCH &&
- policy != SCHED_IDLE)
- return -EINVAL;
+ } else {
+ reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
+ policy &= ~SCHED_RESET_ON_FORK;
+
+ if (policy != SCHED_FIFO && policy != SCHED_RR &&
+ policy != SCHED_NORMAL && policy != SCHED_BATCH &&
+ policy != SCHED_IDLE)
+ return -EINVAL;
+ }
+
/*
* Valid priorities for SCHED_FIFO and SCHED_RR are
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
@@ -6177,6 +6209,10 @@ recheck:
/* can't change other user's priorities */
if (!check_same_owner(p))
return -EPERM;
+
+ /* Normal users shall not reset the sched_reset_on_fork flag */
+ if (p->sched_reset_on_fork && !reset_on_fork)
+ return -EPERM;
}
if (user) {
@@ -6220,6 +6256,8 @@ recheck:
if (running)
p->sched_class->put_prev_task(rq, p);
+ p->sched_reset_on_fork = reset_on_fork;
+
oldprio = p->prio;
__setscheduler(rq, p, policy, param->sched_priority);
@@ -6336,14 +6374,15 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
if (p) {
retval = security_task_getscheduler(p);
if (!retval)
- retval = p->policy;
+ retval = p->policy
+ | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
}
read_unlock(&tasklist_lock);
return retval;
}
/**
- * sys_sched_getscheduler - get the RT priority of a thread
+ * sys_sched_getparam - get the RT priority of a thread
* @pid: the pid in question.
* @param: structure containing the RT priority.
*/
@@ -6571,19 +6610,9 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
-#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
- __might_sleep(__FILE__, __LINE__);
-#endif
- /*
- * The BKS might be reacquired before we have dropped
- * PREEMPT_ACTIVE, which could trigger a second
- * cond_resched() call.
- */
- do {
- add_preempt_count(PREEMPT_ACTIVE);
- schedule();
- sub_preempt_count(PREEMPT_ACTIVE);
- } while (need_resched());
+ add_preempt_count(PREEMPT_ACTIVE);
+ schedule();
+ sub_preempt_count(PREEMPT_ACTIVE);
}
int __sched _cond_resched(void)
@@ -6597,14 +6626,14 @@ int __sched _cond_resched(void)
EXPORT_SYMBOL(_cond_resched);
/*
- * cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
* call schedule, and on return reacquire the lock.
*
* This works OK both with and without CONFIG_PREEMPT. We do strange low-level
* operations here to prevent schedule() from being called twice (once via
* spin_unlock(), once by hand).
*/
-int cond_resched_lock(spinlock_t *lock)
+int __cond_resched_lock(spinlock_t *lock)
{
int resched = should_resched();
int ret = 0;
@@ -6620,9 +6649,9 @@ int cond_resched_lock(spinlock_t *lock)
}
return ret;
}
-EXPORT_SYMBOL(cond_resched_lock);
+EXPORT_SYMBOL(__cond_resched_lock);
-int __sched cond_resched_softirq(void)
+int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
@@ -6634,7 +6663,7 @@ int __sched cond_resched_softirq(void)
}
return 0;
}
-EXPORT_SYMBOL(cond_resched_softirq);
+EXPORT_SYMBOL(__cond_resched_softirq);
/**
* yield - yield the current processor to other threads.
@@ -6658,7 +6687,7 @@ EXPORT_SYMBOL(yield);
*/
void __sched io_schedule(void)
{
- struct rq *rq = &__raw_get_cpu_var(runqueues);
+ struct rq *rq = raw_rq();
delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
@@ -6670,7 +6699,7 @@ EXPORT_SYMBOL(io_schedule);
long __sched io_schedule_timeout(long timeout)
{
- struct rq *rq = &__raw_get_cpu_var(runqueues);
+ struct rq *rq = raw_rq();
long ret;
delayacct_blkio_start();
@@ -7625,7 +7654,7 @@ static int __init migration_init(void)
migration_call(&migration_notifier, CPU_ONLINE, cpu);
register_cpu_notifier(&migration_notifier);
- return err;
+ return 0;
}
early_initcall(migration_init);
#endif
@@ -9398,13 +9427,20 @@ void __init sched_init(void)
}
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-void __might_sleep(char *file, int line)
+static inline int preempt_count_equals(int preempt_offset)
+{
+ int nested = preempt_count() & ~PREEMPT_ACTIVE;
+
+ return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
+}
+
+void __might_sleep(char *file, int line, int preempt_offset)
{
#ifdef in_atomic
static unsigned long prev_jiffy; /* ratelimiting */
- if ((!in_atomic() && !irqs_disabled()) ||
- system_state != SYSTEM_RUNNING || oops_in_progress)
+ if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
+ system_state != SYSTEM_RUNNING || oops_in_progress)
return;
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
return;