summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 08:27:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-18 08:27:54 -0700
commitb8ae30ee26d379db436b0b8c8c3ff1b52f69e5d1 (patch)
tree506aa0b4bdbf90f61e7e9261c7db90aa1452dcce /include
parent4d7b4ac22fbec1a03206c6cde353f2fd6942f828 (diff)
parent9c6f7e43b4e02c161b53e97ba913855246876c61 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits) stop_machine: Move local variable closer to the usage site in cpu_stop_cpu_callback() sched, wait: Use wrapper functions sched: Remove a stale comment ondemand: Make the iowait-is-busy time a sysfs tunable ondemand: Solve a big performance issue by counting IOWAIT time as busy sched: Intoduce get_cpu_iowait_time_us() sched: Eliminate the ts->idle_lastupdate field sched: Fold updating of the last_update_time_info into update_ts_time_stats() sched: Update the idle statistics in get_cpu_idle_time_us() sched: Introduce a function to update the idle statistics sched: Add a comment to get_cpu_idle_time_us() cpu_stop: add dummy implementation for UP sched: Remove rq argument to the tracepoints rcu: need barrier() in UP synchronize_sched_expedited() sched: correctly place paranioa memory barriers in synchronize_sched_expedited() sched: kill paranoia check in synchronize_sched_expedited() sched: replace migration_thread with cpu_stop stop_machine: reimplement using cpu_stop cpu_stop: implement stop_cpu[s]() sched: Fix select_idle_sibling() logic in select_task_rq_fair() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/cpuset.h16
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--include/linux/sched.h70
-rw-r--r--include/linux/stop_machine.h122
-rw-r--r--include/linux/tick.h5
-rw-r--r--include/linux/wait.h35
-rw-r--r--include/trace/events/sched.h32
8 files changed, 166 insertions, 117 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index a5740fc4d04..a73454aec33 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -21,8 +21,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
extern int cpuset_init(void);
extern void cpuset_init_smp(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
-extern void cpuset_cpus_allowed_locked(struct task_struct *p,
- struct cpumask *mask);
+extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
@@ -69,9 +68,6 @@ struct seq_file;
extern void cpuset_task_status_allowed(struct seq_file *m,
struct task_struct *task);
-extern void cpuset_lock(void);
-extern void cpuset_unlock(void);
-
extern int cpuset_mem_spread_node(void);
static inline int cpuset_do_page_mem_spread(void)
@@ -105,10 +101,11 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
{
cpumask_copy(mask, cpu_possible_mask);
}
-static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
- struct cpumask *mask)
+
+static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
{
- cpumask_copy(mask, cpu_possible_mask);
+ cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+ return cpumask_any(cpu_active_mask);
}
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
@@ -157,9 +154,6 @@ static inline void cpuset_task_status_allowed(struct seq_file *m,
{
}
-static inline void cpuset_lock(void) {}
-static inline void cpuset_unlock(void) {}
-
static inline int cpuset_mem_spread_node(void)
{
return 0;
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 14e5a76b2c0..e2e893144a8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -64,8 +64,6 @@ static inline long rcu_batches_completed_bh(void)
return 0;
}
-extern int rcu_expedited_torture_stats(char *page);
-
static inline void rcu_force_quiescent_state(void)
{
}
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 48282055e83..c0ed1c056f2 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -36,7 +36,6 @@ extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_note_context_switch(int cpu);
extern int rcu_needs_cpu(int cpu);
-extern int rcu_expedited_torture_stats(char *page);
#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 28b71ee133f..b55e988988b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -274,11 +274,17 @@ extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
extern int get_nohz_load_balancer(void);
+extern int nohz_ratelimit(int cpu);
#else
static inline int select_nohz_load_balancer(int cpu)
{
return 0;
}
+
+static inline int nohz_ratelimit(int cpu)
+{
+ return 0;
+}
#endif
/*
@@ -953,6 +959,7 @@ struct sched_domain {
char *name;
#endif
+ unsigned int span_weight;
/*
* Span of all CPUs in this domain.
*
@@ -1025,12 +1032,17 @@ struct sched_domain;
#define WF_SYNC 0x01 /* waker goes to sleep after wakup */
#define WF_FORK 0x02 /* child wakeup after fork */
+#define ENQUEUE_WAKEUP 1
+#define ENQUEUE_WAKING 2
+#define ENQUEUE_HEAD 4
+
+#define DEQUEUE_SLEEP 1
+
struct sched_class {
const struct sched_class *next;
- void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
- bool head);
- void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
+ void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
@@ -1039,7 +1051,8 @@ struct sched_class {
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
- int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
+ int (*select_task_rq)(struct rq *rq, struct task_struct *p,
+ int sd_flag, int flags);
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
void (*post_schedule) (struct rq *this_rq);
@@ -1076,36 +1089,8 @@ struct load_weight {
unsigned long weight, inv_weight;
};
-/*
- * CFS stats for a schedulable entity (task, task-group etc)
- *
- * Current field usage histogram:
- *
- * 4 se->block_start
- * 4 se->run_node
- * 4 se->sleep_start
- * 6 se->load.weight
- */
-struct sched_entity {
- struct load_weight load; /* for load-balancing */
- struct rb_node run_node;
- struct list_head group_node;
- unsigned int on_rq;
-
- u64 exec_start;
- u64 sum_exec_runtime;
- u64 vruntime;
- u64 prev_sum_exec_runtime;
-
- u64 last_wakeup;
- u64 avg_overlap;
-
- u64 nr_migrations;
-
- u64 start_runtime;
- u64 avg_wakeup;
-
#ifdef CONFIG_SCHEDSTATS
+struct sched_statistics {
u64 wait_start;
u64 wait_max;
u64 wait_count;
@@ -1137,6 +1122,24 @@ struct sched_entity {
u64 nr_wakeups_affine_attempts;
u64 nr_wakeups_passive;
u64 nr_wakeups_idle;
+};
+#endif
+
+struct sched_entity {
+ struct load_weight load; /* for load-balancing */
+ struct rb_node run_node;
+ struct list_head group_node;
+ unsigned int on_rq;
+
+ u64 exec_start;
+ u64 sum_exec_runtime;
+ u64 vruntime;
+ u64 prev_sum_exec_runtime;
+
+ u64 nr_migrations;
+
+#ifdef CONFIG_SCHEDSTATS
+ struct sched_statistics statistics;
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1839,6 +1842,7 @@ extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
extern void idle_task_exit(void);
#else
static inline void idle_task_exit(void) {}
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index baba3a23a81..6b524a0d02e 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -1,13 +1,101 @@
#ifndef _LINUX_STOP_MACHINE
#define _LINUX_STOP_MACHINE
-/* "Bogolock": stop the entire machine, disable interrupts. This is a
- very heavy lock, which is equivalent to grabbing every spinlock
- (and more). So the "read" side to such a lock is anything which
- disables preeempt. */
+
#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/list.h>
#include <asm/system.h>
+/*
+ * stop_cpu[s]() is simplistic per-cpu maximum priority cpu
+ * monopolization mechanism. The caller can specify a non-sleeping
+ * function to be executed on a single or multiple cpus preempting all
+ * other processes and monopolizing those cpus until it finishes.
+ *
+ * Resources for this mechanism are preallocated when a cpu is brought
+ * up and requests are guaranteed to be served as long as the target
+ * cpus are online.
+ */
+typedef int (*cpu_stop_fn_t)(void *arg);
+
+#ifdef CONFIG_SMP
+
+struct cpu_stop_work {
+ struct list_head list; /* cpu_stopper->works */
+ cpu_stop_fn_t fn;
+ void *arg;
+ struct cpu_stop_done *done;
+};
+
+int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+ struct cpu_stop_work *work_buf);
+int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
+int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
+
+#else /* CONFIG_SMP */
+
+#include <linux/workqueue.h>
+
+struct cpu_stop_work {
+ struct work_struct work;
+ cpu_stop_fn_t fn;
+ void *arg;
+};
+
+static inline int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
+{
+ int ret = -ENOENT;
+ preempt_disable();
+ if (cpu == smp_processor_id())
+ ret = fn(arg);
+ preempt_enable();
+ return ret;
+}
+
+static void stop_one_cpu_nowait_workfn(struct work_struct *work)
+{
+ struct cpu_stop_work *stwork =
+ container_of(work, struct cpu_stop_work, work);
+ preempt_disable();
+ stwork->fn(stwork->arg);
+ preempt_enable();
+}
+
+static inline void stop_one_cpu_nowait(unsigned int cpu,
+ cpu_stop_fn_t fn, void *arg,
+ struct cpu_stop_work *work_buf)
+{
+ if (cpu == smp_processor_id()) {
+ INIT_WORK(&work_buf->work, stop_one_cpu_nowait_workfn);
+ work_buf->fn = fn;
+ work_buf->arg = arg;
+ schedule_work(&work_buf->work);
+ }
+}
+
+static inline int stop_cpus(const struct cpumask *cpumask,
+ cpu_stop_fn_t fn, void *arg)
+{
+ if (cpumask_test_cpu(raw_smp_processor_id(), cpumask))
+ return stop_one_cpu(raw_smp_processor_id(), fn, arg);
+ return -ENOENT;
+}
+
+static inline int try_stop_cpus(const struct cpumask *cpumask,
+ cpu_stop_fn_t fn, void *arg)
+{
+ return stop_cpus(cpumask, fn, arg);
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * stop_machine "Bogolock": stop the entire machine, disable
+ * interrupts. This is a very heavy lock, which is equivalent to
+ * grabbing every spinlock (and more). So the "read" side to such a
+ * lock is anything which disables preeempt.
+ */
#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
/**
@@ -36,24 +124,7 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
*/
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
-/**
- * stop_machine_create: create all stop_machine threads
- *
- * Description: This causes all stop_machine threads to be created before
- * stop_machine actually gets called. This can be used by subsystems that
- * need a non failing stop_machine infrastructure.
- */
-int stop_machine_create(void);
-
-/**
- * stop_machine_destroy: destroy all stop_machine threads
- *
- * Description: This causes all stop_machine threads which were created with
- * stop_machine_create to be destroyed again.
- */
-void stop_machine_destroy(void);
-
-#else
+#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
static inline int stop_machine(int (*fn)(void *), void *data,
const struct cpumask *cpus)
@@ -65,8 +136,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
return ret;
}
-static inline int stop_machine_create(void) { return 0; }
-static inline void stop_machine_destroy(void) { }
-
-#endif /* CONFIG_SMP */
-#endif /* _LINUX_STOP_MACHINE */
+#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#endif /* _LINUX_STOP_MACHINE */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index d2ae79e21be..b232ccc0ee2 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -42,6 +42,7 @@ enum tick_nohz_mode {
* @idle_waketime: Time when the idle was interrupted
* @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
+ * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
* @sleep_length: Duration of the current idle sleep
* @do_timer_lst: CPU was the last one doing do_timer before going idle
*/
@@ -60,7 +61,7 @@ struct tick_sched {
ktime_t idle_waketime;
ktime_t idle_exittime;
ktime_t idle_sleeptime;
- ktime_t idle_lastupdate;
+ ktime_t iowait_sleeptime;
ktime_t sleep_length;
unsigned long last_jiffies;
unsigned long next_jiffies;
@@ -124,6 +125,7 @@ extern void tick_nohz_stop_sched_tick(int inidle);
extern void tick_nohz_restart_sched_tick(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
+extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
# else
static inline void tick_nohz_stop_sched_tick(int inidle) { }
static inline void tick_nohz_restart_sched_tick(void) { }
@@ -134,6 +136,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
return len;
}
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
+static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
# endif /* !NO_HZ */
#endif
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a48e16b77d5..76d96d035ea 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -127,12 +127,26 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
/*
* Used for wake-one threads:
*/
+static inline void __add_wait_queue_exclusive(wait_queue_head_t *q,
+ wait_queue_t *wait)
+{
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ __add_wait_queue(q, wait);
+}
+
static inline void __add_wait_queue_tail(wait_queue_head_t *head,
- wait_queue_t *new)
+ wait_queue_t *new)
{
list_add_tail(&new->task_list, &head->task_list);
}
+static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q,
+ wait_queue_t *wait)
+{
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ __add_wait_queue_tail(q, wait);
+}
+
static inline void __remove_wait_queue(wait_queue_head_t *head,
wait_queue_t *old)
{
@@ -404,25 +418,6 @@ do { \
})
/*
- * Must be called with the spinlock in the wait_queue_head_t held.
- */
-static inline void add_wait_queue_exclusive_locked(wait_queue_head_t *q,
- wait_queue_t * wait)
-{
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue_tail(q, wait);
-}
-
-/*
- * Must be called with the spinlock in the wait_queue_head_t held.
- */
-static inline void remove_wait_queue_locked(wait_queue_head_t *q,
- wait_queue_t * wait)
-{
- __remove_wait_queue(q, wait);
-}
-
-/*
* These are the old interfaces to sleep waiting for an event.
* They are racy. DO NOT use them, use the wait_event* interfaces above.
* We plan to remove these interfaces.
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index cfceb0b73e2..4f733ecea46 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -51,15 +51,12 @@ TRACE_EVENT(sched_kthread_stop_ret,
/*
* Tracepoint for waiting on task to unschedule:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- * but used by the latency tracer plugin. )
*/
TRACE_EVENT(sched_wait_task,
- TP_PROTO(struct rq *rq, struct task_struct *p),
+ TP_PROTO(struct task_struct *p),
- TP_ARGS(rq, p),
+ TP_ARGS(p),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
@@ -79,15 +76,12 @@ TRACE_EVENT(sched_wait_task,
/*
* Tracepoint for waking up a task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- * but used by the latency tracer plugin. )
*/
DECLARE_EVENT_CLASS(sched_wakeup_template,
- TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+ TP_PROTO(struct task_struct *p, int success),
- TP_ARGS(rq, p, success),
+ TP_ARGS(p, success),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
@@ -111,31 +105,25 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
);
DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
- TP_PROTO(struct rq *rq, struct task_struct *p, int success),
- TP_ARGS(rq, p, success));
+ TP_PROTO(struct task_struct *p, int success),
+ TP_ARGS(p, success));
/*
* Tracepoint for waking up a new task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- * but used by the latency tracer plugin. )
*/
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
- TP_PROTO(struct rq *rq, struct task_struct *p, int success),
- TP_ARGS(rq, p, success));
+ TP_PROTO(struct task_struct *p, int success),
+ TP_ARGS(p, success));
/*
* Tracepoint for task switches, performed by the scheduler:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- * but used by the latency tracer plugin. )
*/
TRACE_EVENT(sched_switch,
- TP_PROTO(struct rq *rq, struct task_struct *prev,
+ TP_PROTO(struct task_struct *prev,
struct task_struct *next),
- TP_ARGS(rq, prev, next),
+ TP_ARGS(prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )