sched/deadline: Add SCHED_DEADLINE structures & implementation

Introduces the data structures, constants and symbols needed for SCHED_DEADLINE implementation. Core data structure of SCHED_DEADLINE are defined, along with their initializers. Hooks for checking if a task belong to the new policy are also added where they are needed. Adds a scheduling class, in sched/dl.c and a new policy called SCHED_DEADLINE. It is an implementation of the Earliest Deadline First (EDF) scheduling algorithm, augmented with a mechanism (called Constant Bandwidth Server, CBS) that makes it possible to isolate the behaviour of tasks between each other. The typical -deadline task will be made up of a computation phase (instance) which is activated on a periodic or sporadic fashion. The expected (maximum) duration of such computation is called the task's runtime; the time interval by which each instance need to be completed is called the task's relative deadline. The task's absolute deadline is dynamically calculated as the time instant a task (better, an instance) activates plus the relative deadline. The EDF algorithms selects the task with the smallest absolute deadline as the one to be executed first, while the CBS ensures each task to run for at most its runtime every (relative) deadline length time interval, avoiding any interference between different tasks (bandwidth isolation). Thanks to this feature, also tasks that do not strictly comply with the computational model sketched above can effectively use the new policy. To summarize, this patch: - introduces the data structures, constants and symbols needed; - implements the core logic of the scheduling algorithm in the new scheduling class file; - provides all the glue code between the new scheduling class and the core scheduler and refines the interactions between sched/dl and the other existing scheduling classes. Signed-off-by: Dario Faggioli <raistlin@linux.it> Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com> Signed-off-by: Fabio Checconi <fchecconi@gmail.com> Signed-off-by: Juri Lelli <juri.lelli@gmail.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-4-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Dario Faggioli <raistlin@linux.it> 2013-11-28 11:14:43 +0100
committer: Ingo Molnar <mingo@kernel.org> 2014-01-13 13:41:06 +0100
commit: aab03e05e8f7e26f51dee792beddcb5cca9215a5 (patch)
tree: bae7f6033c849e7ca77a98783c732caea412ae75 /kernel/sched/core.c
parent: d50dde5a10f305253cbc3855307f608f8a3c5f73 (diff)
1 files changed, 94 insertions, 15 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8174f889076..203aecdcfcc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -899,7 +899,9 @@ static inline int normal_prio(struct task_struct *p)
 {
 	int prio;
 
-	if (task_has_rt_policy(p))
+	if (task_has_dl_policy(p))
+		prio = MAX_DL_PRIO-1;
+	else if (task_has_rt_policy(p))
 		prio = MAX_RT_PRIO-1 - p->rt_priority;
 	else
 		prio = __normal_prio(p);
@@ -1717,6 +1719,12 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
+	RB_CLEAR_NODE(&p->dl.rb_node);
+	hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	p->dl.dl_runtime = p->dl.runtime = 0;
+	p->dl.dl_deadline = p->dl.deadline = 0;
+	p->dl.flags = 0;
+
 	INIT_LIST_HEAD(&p->rt.run_list);
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -1768,7 +1776,7 @@ void set_numabalancing_state(bool enabled)
 /*
  * fork()/clone()-time setup:
  */
-void sched_fork(unsigned long clone_flags, struct task_struct *p)
+int sched_fork(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long flags;
 	int cpu = get_cpu();
@@ -1790,7 +1798,7 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p)
 	 * Revert to default priority/policy on fork if requested.
 	 */
 	if (unlikely(p->sched_reset_on_fork)) {
-		if (task_has_rt_policy(p)) {
+		if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
 			p->policy = SCHED_NORMAL;
 			p->static_prio = NICE_TO_PRIO(0);
 			p->rt_priority = 0;
@@ -1807,8 +1815,14 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p)
 		p->sched_reset_on_fork = 0;
 	}
 
-	if (!rt_prio(p->prio))
+	if (dl_prio(p->prio)) {
+		put_cpu();
+		return -EAGAIN;
+	} else if (rt_prio(p->prio)) {
+		p->sched_class = &rt_sched_class;
+	} else {
 		p->sched_class = &fair_sched_class;
+	}
 
 	if (p->sched_class->task_fork)
 		p->sched_class->task_fork(p);
@@ -1837,6 +1851,7 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif
 
 	put_cpu();
+	return 0;
 }
 
 /*
@@ -2768,7 +2783,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	struct rq *rq;
 	const struct sched_class *prev_class;
 
-	BUG_ON(prio < 0 || prio > MAX_PRIO);
+	BUG_ON(prio > MAX_PRIO);
 
 	rq = __task_rq_lock(p);
 
@@ -2800,7 +2815,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	if (running)
 		p->sched_class->put_prev_task(rq, p);
 
-	if (rt_prio(prio))
+	if (dl_prio(prio))
+		p->sched_class = &dl_sched_class;
+	else if (rt_prio(prio))
 		p->sched_class = &rt_sched_class;
 	else
 		p->sched_class = &fair_sched_class;
@@ -2835,9 +2852,9 @@ void set_user_nice(struct task_struct *p, long nice)
 	 * The RT priorities are set via sched_setscheduler(), but we still
 	 * allow the 'normal' nice value to be set - but as expected
 	 * it wont have any effect on scheduling until the task is
-	 * SCHED_FIFO/SCHED_RR:
+	 * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
 	 */
-	if (task_has_rt_policy(p)) {
+	if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
@@ -2992,6 +3009,27 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 	return pid ? find_task_by_vpid(pid) : current;
 }
 
+/*
+ * This function initializes the sched_dl_entity of a newly becoming
+ * SCHED_DEADLINE task.
+ *
+ * Only the static values are considered here, the actual runtime and the
+ * absolute deadline will be properly calculated when the task is enqueued
+ * for the first time with its new policy.
+ */
+static void
+__setparam_dl(struct task_struct *p, const struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	init_dl_task_timer(dl_se);
+	dl_se->dl_runtime = attr->sched_runtime;
+	dl_se->dl_deadline = attr->sched_deadline;
+	dl_se->flags = attr->sched_flags;
+	dl_se->dl_throttled = 0;
+	dl_se->dl_new = 1;
+}
+
 /* Actually do priority change: must hold pi & rq lock. */
 static void __setscheduler(struct rq *rq, struct task_struct *p,
 			   const struct sched_attr *attr)
@@ -3000,7 +3038,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 
 	p->policy = policy;
 
-	if (rt_policy(policy))
+	if (dl_policy(policy))
+		__setparam_dl(p, attr);
+	else if (rt_policy(policy))
 		p->rt_priority = attr->sched_priority;
 	else
 		p->static_prio = NICE_TO_PRIO(attr->sched_nice);
@@ -3008,13 +3048,39 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 	p->normal_prio = normal_prio(p);
 	p->prio = rt_mutex_getprio(p);
 
-	if (rt_prio(p->prio))
+	if (dl_prio(p->prio))
+		p->sched_class = &dl_sched_class;
+	else if (rt_prio(p->prio))
 		p->sched_class = &rt_sched_class;
 	else
 		p->sched_class = &fair_sched_class;
 
 	set_load_weight(p);
 }
+
+static void
+__getparam_dl(struct task_struct *p, struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	attr->sched_priority = p->rt_priority;
+	attr->sched_runtime = dl_se->dl_runtime;
+	attr->sched_deadline = dl_se->dl_deadline;
+	attr->sched_flags = dl_se->flags;
+}
+
+/*
+ * This function validates the new parameters of a -deadline task.
+ * We ask for the deadline not being zero, and greater or equal
+ * than the runtime.
+ */
+static bool
+__checkparam_dl(const struct sched_attr *attr)
+{
+	return attr && attr->sched_deadline != 0 &&
+	       (s64)(attr->sched_deadline - attr->sched_runtime) >= 0;
+}
+
 /*
  * check the target process has a UID that matches the current process's
  */
@@ -3053,7 +3119,8 @@ recheck:
 		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
 		policy &= ~SCHED_RESET_ON_FORK;
 
-		if (policy != SCHED_FIFO && policy != SCHED_RR &&
+		if (policy != SCHED_DEADLINE &&
+				policy != SCHED_FIFO && policy != SCHED_RR &&
 				policy != SCHED_NORMAL && policy != SCHED_BATCH &&
 				policy != SCHED_IDLE)
 			return -EINVAL;
@@ -3068,7 +3135,8 @@ recheck:
 	    (p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) ||
 	    (!p->mm && attr->sched_priority > MAX_RT_PRIO-1))
 		return -EINVAL;
-	if (rt_policy(policy) != (attr->sched_priority != 0))
+	if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
+	    (rt_policy(policy) != (attr->sched_priority != 0)))
 		return -EINVAL;
 
 	/*
@@ -3143,6 +3211,8 @@ recheck:
 			goto change;
 		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
 			goto change;
+		if (dl_policy(policy))
+			goto change;
 
 		task_rq_unlock(rq, p, &flags);
 		return 0;
@@ -3453,6 +3523,10 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
 	if (retval)
 		goto out_unlock;
 
+	if (task_has_dl_policy(p)) {
+		retval = -EINVAL;
+		goto out_unlock;
+	}
 	lp.sched_priority = p->rt_priority;
 	rcu_read_unlock();
 
@@ -3510,7 +3584,7 @@ err_size:
 }
 
 /**
- * sys_sched_getattr - same as above, but with extended "sched_param"
+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
  * @pid: the pid in question.
  * @attr: structure containing the extended parameters.
  * @size: sizeof(attr) for fwd/bwd comp.
@@ -3539,7 +3613,9 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
 		goto out_unlock;
 
 	attr.sched_policy = p->policy;
-	if (task_has_rt_policy(p))
+	if (task_has_dl_policy(p))
+		__getparam_dl(p, &attr);
+	else if (task_has_rt_policy(p))
 		attr.sched_priority = p->rt_priority;
 	else
 		attr.sched_nice = TASK_NICE(p);
@@ -3965,6 +4041,7 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
 	case SCHED_RR:
 		ret = MAX_USER_RT_PRIO-1;
 		break;
+	case SCHED_DEADLINE:
 	case SCHED_NORMAL:
 	case SCHED_BATCH:
 	case SCHED_IDLE:
@@ -3991,6 +4068,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
 	case SCHED_RR:
 		ret = 1;
 		break;
+	case SCHED_DEADLINE:
 	case SCHED_NORMAL:
 	case SCHED_BATCH:
 	case SCHED_IDLE:
@@ -6472,6 +6550,7 @@ void __init sched_init(void)
 		rq->calc_load_update = jiffies + LOAD_FREQ;
 		init_cfs_rq(&rq->cfs);
 		init_rt_rq(&rq->rt, rq);
+		init_dl_rq(&rq->dl, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -6659,7 +6738,7 @@ void normalize_rt_tasks(void)
 		p->se.statistics.block_start	= 0;
 #endif
 
-		if (!rt_task(p)) {
+		if (!dl_task(p) && !rt_task(p)) {
 			/*
 			 * Renice negative nice level userspace
 			 * tasks back to 0:
author	Dario Faggioli <raistlin@linux.it>	2013-11-28 11:14:43 +0100
committer	Ingo Molnar <mingo@kernel.org>	2014-01-13 13:41:06 +0100
commit	aab03e05e8f7e26f51dee792beddcb5cca9215a5 (patch)
tree	bae7f6033c849e7ca77a98783c732caea412ae75 /kernel/sched/core.c
parent	d50dde5a10f305253cbc3855307f608f8a3c5f73 (diff)