diff options
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r-- | include/linux/sched.h | 217 |
1 files changed, 154 insertions, 63 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 18d63cea284..20b03bf9474 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -315,7 +315,6 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; -extern int softlockup_thresh; void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) @@ -360,7 +359,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); -extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); +extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; struct user_namespace; @@ -514,6 +513,7 @@ struct thread_group_cputimer { spinlock_t lock; }; +#include <linux/rwsem.h> struct autogroup; /* @@ -633,6 +633,16 @@ struct signal_struct { unsigned audit_tty; struct tty_audit_buf *tty_audit_buf; #endif +#ifdef CONFIG_CGROUPS + /* + * The threadgroup_fork_lock prevents threads from forking with + * CLONE_THREAD while held for writing. Use this for fork-sensitive + * threadgroup-wide operations. It's taken for reading in fork.c in + * copy_process(). + * Currently only needed write-side by cgroups. + */ + struct rw_semaphore threadgroup_fork_lock; +#endif int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ @@ -653,9 +663,8 @@ struct signal_struct { * Bits in flags field of signal_struct. */ #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_DEQUEUED 0x00000002 /* stop signal dequeued */ -#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ +#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ +#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ /* * Pending notifications to parent. */ @@ -731,10 +740,6 @@ struct sched_info { /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ -#ifdef CONFIG_SCHEDSTATS - /* BKL stats */ - unsigned int bkl_count; -#endif }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ @@ -792,17 +797,39 @@ enum cpu_idle_type { }; /* - * sched-domains (multiprocessor balancing) declarations: + * Increase resolution of nice-level calculations for 64-bit architectures. + * The extra resolution improves shares distribution and load balancing of + * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup + * hierarchies, especially on larger systems. This is not a user-visible change + * and does not change the user-interface for setting shares/weights. + * + * We increase resolution only if we have enough bits to allow this increased + * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution + * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the + * increased costs. */ +#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ +# define SCHED_LOAD_RESOLUTION 10 +# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) +# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) +#else +# define SCHED_LOAD_RESOLUTION 0 +# define scale_load(w) (w) +# define scale_load_down(w) (w) +#endif -/* - * Increase resolution of nice-level calculations: - */ -#define SCHED_LOAD_SHIFT 10 +#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) -#define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE +/* + * Increase resolution of cpu_power calculations + */ +#define SCHED_POWER_SHIFT 10 +#define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT) +/* + * sched-domains (multiprocessor balancing) declarations: + */ #ifdef CONFIG_SMP #define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ @@ -817,6 +844,7 @@ enum cpu_idle_type { #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ +#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ enum powersavings_balance_level { POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ @@ -866,15 +894,21 @@ static inline int sd_power_saving_flags(void) return 0; } -struct sched_group { - struct sched_group *next; /* Must be a circular list */ - +struct sched_group_power { + atomic_t ref; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a * single CPU. */ - unsigned int cpu_power, cpu_power_orig; + unsigned int power, power_orig; +}; + +struct sched_group { + struct sched_group *next; /* Must be a circular list */ + atomic_t ref; + unsigned int group_weight; + struct sched_group_power *sgp; /* * The CPUs this group covers. @@ -882,9 +916,6 @@ struct sched_group { * NOTE: this field is variable length. (Allocated dynamically * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) - * - * It is also be embedded into static data structures at build - * time. (See 'struct static_sched_group' in kernel/sched.c) */ unsigned long cpumask[0]; }; @@ -894,17 +925,6 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } -enum sched_domain_level { - SD_LV_NONE = 0, - SD_LV_SIBLING, - SD_LV_MC, - SD_LV_BOOK, - SD_LV_CPU, - SD_LV_NODE, - SD_LV_ALLNODES, - SD_LV_MAX -}; - struct sched_domain_attr { int relax_domain_level; }; @@ -913,6 +933,8 @@ struct sched_domain_attr { .relax_domain_level = -1, \ } +extern int sched_domain_level_max; + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ @@ -930,7 +952,7 @@ struct sched_domain { unsigned int forkexec_idx; unsigned int smt_gain; int flags; /* See SD_* */ - enum sched_domain_level level; + int level; /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ @@ -973,6 +995,10 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif + union { + void *private; /* used during construction */ + struct rcu_head rcu; /* used during destruction */ + }; unsigned int span_weight; /* @@ -981,9 +1007,6 @@ struct sched_domain { * NOTE: this field is variable length. (Allocated dynamically * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) - * - * It is also be embedded into static data structures at build - * time. (See 'struct static_sched_domain' in kernel/sched.c) */ unsigned long span[0]; }; @@ -1046,10 +1069,15 @@ struct sched_domain; */ #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ #define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x04 /* internal use, task got migrated */ #define ENQUEUE_WAKEUP 1 -#define ENQUEUE_WAKING 2 -#define ENQUEUE_HEAD 4 +#define ENQUEUE_HEAD 2 +#ifdef CONFIG_SMP +#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ +#else +#define ENQUEUE_WAKING 0 +#endif #define DEQUEUE_SLEEP 1 @@ -1067,12 +1095,11 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP - int (*select_task_rq)(struct rq *rq, struct task_struct *p, - int sd_flag, int flags); + int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); - void (*task_waking) (struct rq *this_rq, struct task_struct *task); + void (*task_waking) (struct task_struct *task); void (*task_woken) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, @@ -1197,13 +1224,11 @@ struct task_struct { unsigned int flags; /* per process flags, defined below */ unsigned int ptrace; - int lock_depth; /* BKL lock depth */ - #ifdef CONFIG_SMP -#ifdef __ARCH_WANT_UNLOCKED_CTXSW - int oncpu; -#endif + struct task_struct *wake_entry; + int on_cpu; #endif + int on_rq; int prio, static_prio, normal_prio; unsigned int rt_priority; @@ -1235,6 +1260,9 @@ struct task_struct { #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; +#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) + int rcu_boosted; +#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TREE_PREEMPT_RCU @@ -1264,6 +1292,7 @@ struct task_struct { int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ + unsigned int jobctl; /* JOBCTL_*, siglock protected */ /* ??? */ unsigned int personality; unsigned did_exec:1; @@ -1274,6 +1303,7 @@ struct task_struct { /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; + unsigned sched_contributes_to_load:1; pid_t pid; pid_t tgid; @@ -1482,7 +1512,6 @@ struct task_struct { short il_next; short pref_node_fork; #endif - atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; /* @@ -1526,7 +1555,7 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; - /* bitmask of trace recursion */ + /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ @@ -1537,6 +1566,9 @@ struct task_struct { unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ } memcg_batch; #endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_t ptrace_bp_refcnt; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ @@ -1741,7 +1773,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ #define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_OOM_ORIGIN 0x00080000 /* Allocating much memory to others */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ @@ -1780,6 +1811,36 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) +/* + * task->jobctl flags + */ +#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ + +#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ +#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ +#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ +#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ +#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ +#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ +#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ + +#define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT) +#define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT) +#define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) +#define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT) +#define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT) +#define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) +#define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT) + +#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) + +extern bool task_set_jobctl_pending(struct task_struct *task, + unsigned int mask); +extern void task_clear_jobctl_trapping(struct task_struct *task); +extern void task_clear_jobctl_pending(struct task_struct *task, + unsigned int mask); + #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ @@ -1808,9 +1869,16 @@ static inline void rcu_copy_process(struct task_struct *p) #endif #ifdef CONFIG_SMP +extern void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask); + extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); #else +static inline void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask) +{ +} static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { @@ -2060,14 +2128,13 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); -extern void wake_up_new_task(struct task_struct *tsk, - unsigned long clone_flags); +extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void sched_fork(struct task_struct *p, int clone_flags); +extern void sched_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); @@ -2087,7 +2154,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return ret; -} +} extern void block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask); @@ -2102,7 +2169,7 @@ extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); -extern int do_notify_parent(struct task_struct *, int); +extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); @@ -2192,8 +2259,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP +void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else +static inline void scheduler_ipi(void) { } static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { @@ -2224,8 +2293,10 @@ static inline int get_nr_threads(struct task_struct *tsk) return tsk->signal->nr_threads; } -/* de_thread depends on thread_group_leader not being a pid based check */ -#define thread_group_leader(p) (p == p->group_leader) +static inline bool thread_group_leader(struct task_struct *p) +{ + return p->exit_signal >= 0; +} /* Do to the insanities of de_thread it is possible for a process * to have the pid of the thread group leader without actually being @@ -2258,11 +2329,6 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -static inline int task_detached(struct task_struct *p) -{ - return p->exit_signal == -1; -} - /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also @@ -2299,6 +2365,31 @@ static inline void unlock_task_sighand(struct task_struct *tsk, spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); } +/* See the declaration of threadgroup_fork_lock in signal_struct. */ +#ifdef CONFIG_CGROUPS +static inline void threadgroup_fork_read_lock(struct task_struct *tsk) +{ + down_read(&tsk->signal->threadgroup_fork_lock); +} +static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) +{ + up_read(&tsk->signal->threadgroup_fork_lock); +} +static inline void threadgroup_fork_write_lock(struct task_struct *tsk) +{ + down_write(&tsk->signal->threadgroup_fork_lock); +} +static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) +{ + up_write(&tsk->signal->threadgroup_fork_lock); +} +#else +static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {} +static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {} +static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {} +static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {} +#endif + #ifndef __HAVE_THREAD_FUNCTIONS #define task_thread_info(task) ((struct thread_info *)(task)->stack) @@ -2434,7 +2525,7 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPT_COUNT #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET #else #define PREEMPT_LOCK_OFFSET 0 |