summaryrefslogtreecommitdiffstats
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h198
1 files changed, 169 insertions, 29 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d722490da03..045b0d22784 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
#include <linux/errno.h>
#include <linux/nodemask.h>
#include <linux/mm_types.h>
+#include <linux/preempt.h>
#include <asm/page.h>
#include <asm/ptrace.h>
@@ -107,14 +108,6 @@ extern unsigned long this_cpu_load(void);
extern void calc_global_load(unsigned long ticks);
extern void update_cpu_load_nohz(void);
-/* Notifier for when a task gets migrated to a new CPU */
-struct task_migration_notifier {
- struct task_struct *task;
- int from_cpu;
- int to_cpu;
-};
-extern void register_task_migration_notifier(struct notifier_block *n);
-
extern unsigned long get_parent_ip(unsigned long addr);
extern void dump_cpu_task(int cpu);
@@ -435,6 +428,14 @@ struct task_cputime {
.sum_exec_runtime = 0, \
}
+#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
+
+#ifdef CONFIG_PREEMPT_COUNT
+#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
+#else
+#define PREEMPT_DISABLED PREEMPT_ENABLED
+#endif
+
/*
* Disable preemption until the scheduler is running.
* Reset by start_kernel()->sched_init()->init_idle().
@@ -442,7 +443,7 @@ struct task_cputime {
* We include PREEMPT_ACTIVE to avoid cond_resched() from working
* before the scheduler is active -- see should_resched().
*/
-#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE)
/**
* struct thread_group_cputimer - thread group interval timer counts
@@ -776,6 +777,7 @@ enum cpu_idle_type {
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
+#define SD_NUMA 0x4000 /* cross-node balancing */
extern int __weak arch_sd_sibiling_asym_packing(void);
@@ -819,6 +821,10 @@ struct sched_domain {
u64 last_update;
+ /* idle_balance() stats */
+ u64 max_newidle_lb_cost;
+ unsigned long next_decay_max_lb_cost;
+
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1034,6 +1040,11 @@ struct task_struct {
#ifdef CONFIG_SMP
struct llist_node wake_entry;
int on_cpu;
+ struct task_struct *last_wakee;
+ unsigned long wakee_flips;
+ unsigned long wakee_flip_decay_ts;
+
+ int wake_cpu;
#endif
int on_rq;
@@ -1329,10 +1340,41 @@ struct task_struct {
#endif
#ifdef CONFIG_NUMA_BALANCING
int numa_scan_seq;
- int numa_migrate_seq;
unsigned int numa_scan_period;
+ unsigned int numa_scan_period_max;
+ int numa_preferred_nid;
+ int numa_migrate_deferred;
+ unsigned long numa_migrate_retry;
u64 node_stamp; /* migration stamp */
struct callback_head numa_work;
+
+ struct list_head numa_entry;
+ struct numa_group *numa_group;
+
+ /*
+ * Exponential decaying average of faults on a per-node basis.
+ * Scheduling placement decisions are made based on the these counts.
+ * The values remain static for the duration of a PTE scan
+ */
+ unsigned long *numa_faults;
+ unsigned long total_numa_faults;
+
+ /*
+ * numa_faults_buffer records faults per node during the current
+ * scan window. When the scan completes, the counts in numa_faults
+ * decay and these values are copied.
+ */
+ unsigned long *numa_faults_buffer;
+
+ /*
+ * numa_faults_locality tracks if faults recorded during the last
+ * scan window were remote/local. The task scan period is adapted
+ * based on the locality of the faults with different weights
+ * depending on whether they were shared or private faults
+ */
+ unsigned long numa_faults_locality[2];
+
+ unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
struct rcu_head rcu;
@@ -1398,6 +1440,12 @@ struct task_struct {
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
} memcg_batch;
unsigned int memcg_kmem_skip_account;
+ struct memcg_oom_info {
+ struct mem_cgroup *memcg;
+ gfp_t gfp_mask;
+ int order;
+ unsigned int may_oom:1;
+ } memcg_oom;
#endif
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
@@ -1411,16 +1459,33 @@ struct task_struct {
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+#define TNF_MIGRATED 0x01
+#define TNF_NO_GROUP 0x02
+#define TNF_SHARED 0x04
+#define TNF_FAULT_LOCAL 0x08
+
#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
+extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
+
+extern unsigned int sysctl_numa_balancing_migrate_deferred;
#else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+ int flags)
{
}
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+ return 0;
+}
static inline void set_numabalancing_state(bool enabled)
{
}
+static inline void task_numa_free(struct task_struct *p)
+{
+}
#endif
static inline struct pid *task_pid(struct task_struct *task)
@@ -1532,6 +1597,8 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
* Test if a process is not yet dead (at most zombie state)
* If pid_alive fails, then pointers within the task structure
* can be stale and must not be dereferenced.
+ *
+ * Return: 1 if the process is alive. 0 otherwise.
*/
static inline int pid_alive(struct task_struct *p)
{
@@ -1543,6 +1610,8 @@ static inline int pid_alive(struct task_struct *p)
* @tsk: Task structure to be checked.
*
* Check if a task structure is the first user space task the kernel created.
+ *
+ * Return: 1 if the task structure is init. 0 otherwise.
*/
static inline int is_global_init(struct task_struct *tsk)
{
@@ -1894,6 +1963,8 @@ extern struct task_struct *idle_task(int cpu);
/**
* is_idle_task - is the specified task an idle task?
* @p: the task in question.
+ *
+ * Return: 1 if @p is an idle task. 0 otherwise.
*/
static inline bool is_idle_task(const struct task_struct *p)
{
@@ -1967,7 +2038,7 @@ extern void wake_up_new_task(struct task_struct *tsk);
#else
static inline void kick_process(struct task_struct *tsk) { }
#endif
-extern void sched_fork(struct task_struct *p);
+extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_dead(struct task_struct *p);
extern void proc_caches_init(void);
@@ -2168,15 +2239,15 @@ static inline bool thread_group_leader(struct task_struct *p)
* all we care about is that we have a task with the appropriate
* pid, we don't actually care if we have the right task.
*/
-static inline int has_group_leader_pid(struct task_struct *p)
+static inline bool has_group_leader_pid(struct task_struct *p)
{
- return p->pid == p->tgid;
+ return task_pid(p) == p->signal->leader_pid;
}
static inline
-int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
{
- return p1->tgid == p2->tgid;
+ return p1->signal == p2->signal;
}
static inline struct task_struct *next_thread(const struct task_struct *p)
@@ -2394,11 +2465,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}
-static inline int need_resched(void)
-{
- return unlikely(test_thread_flag(TIF_NEED_RESCHED));
-}
-
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
@@ -2467,36 +2533,105 @@ static inline int tsk_is_polling(struct task_struct *p)
{
return task_thread_info(p)->status & TS_POLLING;
}
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
{
current_thread_info()->status |= TS_POLLING;
}
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ __current_set_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb();
+
+ return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
{
current_thread_info()->status &= ~TS_POLLING;
- smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb();
+
+ return unlikely(tif_need_resched());
}
#elif defined(TIF_POLLING_NRFLAG)
static inline int tsk_is_polling(struct task_struct *p)
{
return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
}
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
{
set_thread_flag(TIF_POLLING_NRFLAG);
}
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ __current_set_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ *
+ * XXX: assumes set/clear bit are identical barrier wise.
+ */
+ smp_mb__after_clear_bit();
+
+ return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
{
clear_thread_flag(TIF_POLLING_NRFLAG);
}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb__after_clear_bit();
+
+ return unlikely(tif_need_resched());
+}
+
#else
static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
#endif
+static __always_inline bool need_resched(void)
+{
+ return unlikely(tif_need_resched());
+}
+
/*
* Thread group CPU time accounting.
*/
@@ -2538,6 +2673,11 @@ static inline unsigned int task_cpu(const struct task_struct *p)
return task_thread_info(p)->cpu;
}
+static inline int task_node(const struct task_struct *p)
+{
+ return cpu_to_node(task_cpu(p));
+}
+
extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
#else