summaryrefslogtreecommitdiffstats
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h218
1 files changed, 138 insertions, 80 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a781dec1cd0..ea74596014a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3,6 +3,8 @@
#include <uapi/linux/sched.h>
+#include <linux/sched/prio.h>
+
struct sched_param {
int sched_priority;
@@ -27,7 +29,7 @@ struct sched_param {
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/cputime.h>
+#include <linux/cputime.h>
#include <linux/smp.h>
#include <linux/sem.h>
@@ -130,11 +132,9 @@ struct perf_event_context;
struct blk_plug;
struct filename;
-/*
- * List of flags we want to share for kernel threads,
- * if only because they are not used by them anyway.
- */
-#define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
/*
* These are the constant used to fake the fixed-point load-average
@@ -204,8 +204,9 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
/* in tsk->exit_state */
-#define EXIT_ZOMBIE 16
-#define EXIT_DEAD 32
+#define EXIT_DEAD 16
+#define EXIT_ZOMBIE 32
+#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
/* in tsk->state again */
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
@@ -213,7 +214,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
#define TASK_PARKED 512
#define TASK_STATE_MAX 1024
-#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
extern char ___assert_task_state[1 - 2*!!(
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -292,10 +293,14 @@ extern int runqueue_is_locked(int cpu);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void nohz_balance_enter_idle(int cpu);
extern void set_cpu_sd_state_idle(void);
-extern int get_nohz_timer_target(void);
+extern int get_nohz_timer_target(int pinned);
#else
static inline void nohz_balance_enter_idle(int cpu) { }
static inline void set_cpu_sd_state_idle(void) { }
+static inline int get_nohz_timer_target(int pinned)
+{
+ return smp_processor_id();
+}
#endif
/*
@@ -734,7 +739,6 @@ static inline int signal_group_exit(const struct signal_struct *sig)
struct user_struct {
atomic_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
- atomic_t files; /* How many open files does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_INOTIFY_USER
atomic_t inotify_watches; /* How many inotify watches does this user have? */
@@ -859,6 +863,7 @@ enum cpu_idle_type {
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
+#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
@@ -866,7 +871,26 @@ enum cpu_idle_type {
#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
#define SD_NUMA 0x4000 /* cross-node balancing */
-extern int __weak arch_sd_sibiling_asym_packing(void);
+#ifdef CONFIG_SCHED_SMT
+static inline const int cpu_smt_flags(void)
+{
+ return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_SCHED_MC
+static inline const int cpu_core_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+static inline const int cpu_numa_flags(void)
+{
+ return SD_NUMA;
+}
+#endif
struct sched_domain_attr {
int relax_domain_level;
@@ -974,6 +998,38 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
bool cpus_share_cache(int this_cpu, int that_cpu);
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef const int (*sched_domain_flags_f)(void);
+
+#define SDTL_OVERLAP 0x01
+
+struct sd_data {
+ struct sched_domain **__percpu sd;
+ struct sched_group **__percpu sg;
+ struct sched_group_power **__percpu sgp;
+};
+
+struct sched_domain_topology_level {
+ sched_domain_mask_f mask;
+ sched_domain_flags_f sd_flags;
+ int flags;
+ int numa_level;
+ struct sd_data data;
+#ifdef CONFIG_SCHED_DEBUG
+ char *name;
+#endif
+};
+
+extern struct sched_domain_topology_level *sched_domain_topology;
+
+extern void set_sched_topology(struct sched_domain_topology_level *tl);
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(type) .name = #type
+#else
+# define SD_INIT_NAME(type)
+#endif
+
#else /* CONFIG_SMP */
struct sched_domain_attr;
@@ -1077,6 +1133,7 @@ struct sched_entity {
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
+ int depth;
struct sched_entity *parent;
/* rq on which this entity is (to be) queued: */
struct cfs_rq *cfs_rq;
@@ -1111,8 +1168,8 @@ struct sched_dl_entity {
/*
* Original scheduling parameters. Copied here from sched_attr
- * during sched_setscheduler2(), they will remain the same until
- * the next sched_setscheduler2().
+ * during sched_setattr(), they will remain the same until
+ * the next sched_setattr().
*/
u64 dl_runtime; /* maximum runtime for each instance */
u64 dl_deadline; /* relative deadline of each instance */
@@ -1141,9 +1198,12 @@ struct sched_dl_entity {
*
* @dl_boosted tells if we are boosted due to DI. If so we are
* outside bandwidth enforcement mechanism (but only until we
- * exit the critical section).
+ * exit the critical section);
+ *
+ * @dl_yielded tells if task gave up the cpu before consuming
+ * all its available runtime during the last job.
*/
- int dl_throttled, dl_new, dl_boosted;
+ int dl_throttled, dl_new, dl_boosted, dl_yielded;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@@ -1228,6 +1288,9 @@ struct task_struct {
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
+ /* per-thread vma caching */
+ u32 vmacache_seqnum;
+ struct vm_area_struct *vmacache[VMACACHE_SIZE];
#if defined(SPLIT_RSS_COUNTING)
struct task_rss_stat rss_stat;
#endif
@@ -1460,6 +1523,9 @@ struct task_struct {
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
+#ifdef CONFIG_DEBUG_PREEMPT
+ unsigned long preempt_disable_ip;
+#endif
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy; /* Protected by alloc_lock */
short il_next;
@@ -1470,9 +1536,10 @@ struct task_struct {
unsigned int numa_scan_period;
unsigned int numa_scan_period_max;
int numa_preferred_nid;
- int numa_migrate_deferred;
unsigned long numa_migrate_retry;
u64 node_stamp; /* migration stamp */
+ u64 last_task_numa_placement;
+ u64 last_sum_exec_runtime;
struct callback_head numa_work;
struct list_head numa_entry;
@@ -1483,15 +1550,22 @@ struct task_struct {
* Scheduling placement decisions are made based on the these counts.
* The values remain static for the duration of a PTE scan
*/
- unsigned long *numa_faults;
+ unsigned long *numa_faults_memory;
unsigned long total_numa_faults;
/*
* numa_faults_buffer records faults per node during the current
- * scan window. When the scan completes, the counts in numa_faults
- * decay and these values are copied.
+ * scan window. When the scan completes, the counts in
+ * numa_faults_memory decay and these values are copied.
*/
- unsigned long *numa_faults_buffer;
+ unsigned long *numa_faults_buffer_memory;
+
+ /*
+ * Track the nodes the process was running on when a NUMA hinting
+ * fault was incurred.
+ */
+ unsigned long *numa_faults_cpu;
+ unsigned long *numa_faults_buffer_cpu;
/*
* numa_faults_locality tracks if faults recorded during the last
@@ -1596,8 +1670,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
extern void task_numa_free(struct task_struct *p);
-
-extern unsigned int sysctl_numa_balancing_migrate_deferred;
+extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
+ int src_nid, int dst_cpu);
#else
static inline void task_numa_fault(int last_node, int node, int pages,
int flags)
@@ -1613,6 +1687,11 @@ static inline void set_numabalancing_state(bool enabled)
static inline void task_numa_free(struct task_struct *p)
{
}
+static inline bool should_numa_migrate_memory(struct task_struct *p,
+ struct page *page, int src_nid, int dst_cpu)
+{
+ return true;
+}
#endif
static inline struct pid *task_pid(struct task_struct *task)
@@ -1688,6 +1767,24 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk)
}
+static inline int pid_alive(const struct task_struct *p);
+static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
+{
+ pid_t pid = 0;
+
+ rcu_read_lock();
+ if (pid_alive(tsk))
+ pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
+ rcu_read_unlock();
+
+ return pid;
+}
+
+static inline pid_t task_ppid_nr(const struct task_struct *tsk)
+{
+ return task_ppid_nr_ns(tsk, &init_pid_ns);
+}
+
static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
struct pid_namespace *ns)
{
@@ -1727,7 +1824,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
*
* Return: 1 if the process is alive. 0 otherwise.
*/
-static inline int pid_alive(struct task_struct *p)
+static inline int pid_alive(const struct task_struct *p)
{
return p->pids[PIDTYPE_PID].pid != NULL;
}
@@ -1821,7 +1918,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
-#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */
@@ -2080,7 +2176,16 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { }
extern bool yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
-extern int task_nice(const struct task_struct *p);
+/**
+ * task_nice - return the nice value of a given task.
+ * @p: the task in question.
+ *
+ * Return: The nice value [ -20 ... 0 ... 19 ].
+ */
+static inline int task_nice(const struct task_struct *p)
+{
+ return PRIO_TO_NICE((p)->static_prio);
+}
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
@@ -2309,9 +2414,6 @@ extern void flush_itimer_signals(void);
extern void do_group_exit(int);
-extern int allow_signal(int);
-extern int disallow_signal(int);
-
extern int do_execve(struct filename *,
const char __user * const __user *,
const char __user * const __user *);
@@ -2319,7 +2421,7 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
struct task_struct *fork_idle(int);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-extern void set_task_comm(struct task_struct *tsk, char *from);
+extern void set_task_comm(struct task_struct *tsk, const char *from);
extern char *get_task_comm(char *to, struct task_struct *tsk);
#ifdef CONFIG_SMP
@@ -2663,51 +2765,9 @@ static inline int spin_needbreak(spinlock_t *lock)
/*
* Idle thread specific functions to determine the need_resched
- * polling state. We have two versions, one based on TS_POLLING in
- * thread_info.status and one based on TIF_POLLING_NRFLAG in
- * thread_info.flags
+ * polling state.
*/
-#ifdef TS_POLLING
-static inline int tsk_is_polling(struct task_struct *p)
-{
- return task_thread_info(p)->status & TS_POLLING;
-}
-static inline void __current_set_polling(void)
-{
- current_thread_info()->status |= TS_POLLING;
-}
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
- __current_set_polling();
-
- /*
- * Polling state must be visible before we test NEED_RESCHED,
- * paired by resched_task()
- */
- smp_mb();
-
- return unlikely(tif_need_resched());
-}
-
-static inline void __current_clr_polling(void)
-{
- current_thread_info()->status &= ~TS_POLLING;
-}
-
-static inline bool __must_check current_clr_polling_and_test(void)
-{
- __current_clr_polling();
-
- /*
- * Polling state must be visible before we test NEED_RESCHED,
- * paired by resched_task()
- */
- smp_mb();
-
- return unlikely(tif_need_resched());
-}
-#elif defined(TIF_POLLING_NRFLAG)
+#ifdef TIF_POLLING_NRFLAG
static inline int tsk_is_polling(struct task_struct *p)
{
return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
@@ -2725,10 +2785,8 @@ static inline bool __must_check current_set_polling_and_test(void)
/*
* Polling state must be visible before we test NEED_RESCHED,
* paired by resched_task()
- *
- * XXX: assumes set/clear bit are identical barrier wise.
*/
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
return unlikely(tif_need_resched());
}
@@ -2746,7 +2804,7 @@ static inline bool __must_check current_clr_polling_and_test(void)
* Polling state must be visible before we test NEED_RESCHED,
* paired by resched_task()
*/
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
return unlikely(tif_need_resched());
}
@@ -2899,7 +2957,7 @@ static inline void inc_syscw(struct task_struct *tsk)
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
#else
@@ -2910,7 +2968,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
{
}
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)