diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 10:20:12 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 10:20:12 +0900 |
commit | 39cf275a1a18ba3c7eb9b986c5c9b35b57332798 (patch) | |
tree | 40b119ca9d2fbaf8128d3fa25f4c64669002b0c0 /include | |
parent | ad5d69899e52792671c1aa6c7360464c7edfe09c (diff) | |
parent | e5137b50a0640009fd63a3e65c14bc6e1be8796a (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"The main changes in this cycle are:
- (much) improved CONFIG_NUMA_BALANCING support from Mel Gorman, Rik
van Riel, Peter Zijlstra et al. Yay!
- optimize preemption counter handling: merge the NEED_RESCHED flag
into the preempt_count variable, by Peter Zijlstra.
- wait.h fixes and code reorganization from Peter Zijlstra
- cfs_bandwidth fixes from Ben Segall
- SMP load-balancer cleanups from Peter Zijstra
- idle balancer improvements from Jason Low
- other fixes and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (129 commits)
ftrace, sched: Add TRACE_FLAG_PREEMPT_RESCHED
stop_machine: Fix race between stop_two_cpus() and stop_cpus()
sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus
sched: Fix asymmetric scheduling for POWER7
sched: Move completion code from core.c to completion.c
sched: Move wait code from core.c to wait.c
sched: Move wait.c into kernel/sched/
sched/wait: Fix __wait_event_interruptible_lock_irq_timeout()
sched: Avoid throttle_cfs_rq() racing with period_timer stopping
sched: Guarantee new group-entities always have weight
sched: Fix hrtimer_cancel()/rq->lock deadlock
sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining
sched: Fix race on toggling cfs_bandwidth_used
sched: Remove extra put_online_cpus() inside sched_setaffinity()
sched/rt: Fix task_tick_rt() comment
sched/wait: Fix build breakage
sched/wait: Introduce prepare_to_wait_event()
sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too
sched: Remove get_online_cpus() usage
sched: Fix race in migrate_swap_stop()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/preempt.h | 105 | ||||
-rw-r--r-- | include/linux/completion.h | 2 | ||||
-rw-r--r-- | include/linux/hardirq.h | 8 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 1 | ||||
-rw-r--r-- | include/linux/migrate.h | 7 | ||||
-rw-r--r-- | include/linux/mm.h | 118 | ||||
-rw-r--r-- | include/linux/mm_types.h | 17 | ||||
-rw-r--r-- | include/linux/page-flags-layout.h | 28 | ||||
-rw-r--r-- | include/linux/preempt.h | 112 | ||||
-rw-r--r-- | include/linux/sched.h | 167 | ||||
-rw-r--r-- | include/linux/sched/sysctl.h | 1 | ||||
-rw-r--r-- | include/linux/stop_machine.h | 1 | ||||
-rw-r--r-- | include/linux/thread_info.h | 17 | ||||
-rw-r--r-- | include/linux/topology.h | 6 | ||||
-rw-r--r-- | include/linux/tty.h | 28 | ||||
-rw-r--r-- | include/linux/uaccess.h | 8 | ||||
-rw-r--r-- | include/linux/wait.h | 374 | ||||
-rw-r--r-- | include/trace/events/sched.h | 2 |
18 files changed, 600 insertions, 402 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h new file mode 100644 index 00000000000..ddf2b420ac8 --- /dev/null +++ b/include/asm-generic/preempt.h @@ -0,0 +1,105 @@ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include <linux/thread_info.h> + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ + return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline int *preempt_count_ptr(void) +{ + return ¤t_thread_info()->preempt_count; +} + +/* + * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the + * alternative is loosing a reschedule. Better schedule too often -- also this + * should be a very rare operation. + */ +static __always_inline void preempt_count_set(int pc) +{ + *preempt_count_ptr() = pc; +} + +/* + * must be macros to avoid header recursion hell + */ +#define task_preempt_count(p) \ + (task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED) + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ +} while (0) + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + *preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void clear_preempt_need_resched(void) +{ + *preempt_count_ptr() |= PREEMPT_NEED_RESCHED; +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED); +} + +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + *preempt_count_ptr() += val; +} + +static __always_inline void __preempt_count_sub(int val) +{ + *preempt_count_ptr() -= val; +} + +static __always_inline bool __preempt_count_dec_and_test(void) +{ + return !--*preempt_count_ptr(); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ + return unlikely(!*preempt_count_ptr()); +} + +#ifdef CONFIG_PREEMPT +extern asmlinkage void preempt_schedule(void); +#define __preempt_schedule() preempt_schedule() + +#ifdef CONFIG_CONTEXT_TRACKING +extern asmlinkage void preempt_schedule_context(void); +#define __preempt_schedule_context() preempt_schedule_context() +#endif +#endif /* CONFIG_PREEMPT */ + +#endif /* __ASM_PREEMPT_H */ diff --git a/include/linux/completion.h b/include/linux/completion.h index 3cd574d5b19..22c33e35bcb 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -5,7 +5,7 @@ * (C) Copyright 2001 Linus Torvalds * * Atomic wait-for-completion handler data structures. - * See kernel/sched/core.c for details. + * See kernel/sched/completion.c for details. */ #include <linux/wait.h> diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 1e041063b22..d9cf963ac83 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -33,7 +33,7 @@ extern void rcu_nmi_exit(void); #define __irq_enter() \ do { \ account_irq_enter_time(current); \ - add_preempt_count(HARDIRQ_OFFSET); \ + preempt_count_add(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -49,7 +49,7 @@ extern void irq_enter(void); do { \ trace_hardirq_exit(); \ account_irq_exit_time(current); \ - sub_preempt_count(HARDIRQ_OFFSET); \ + preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) /* @@ -62,7 +62,7 @@ extern void irq_exit(void); lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ - add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ } while (0) @@ -72,7 +72,7 @@ extern void irq_exit(void); trace_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ - sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ } while (0) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index da6716b9e3f..ea4d2495c64 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -136,6 +136,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, struct mempolicy *get_vma_policy(struct task_struct *tsk, struct vm_area_struct *vma, unsigned long addr); +bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma); extern void numa_default_policy(void); extern void numa_policy_init(void); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 8d3c57fdf22..f5096b58b20 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -90,11 +90,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_NUMA_BALANCING -extern int migrate_misplaced_page(struct page *page, int node); -extern int migrate_misplaced_page(struct page *page, int node); +extern int migrate_misplaced_page(struct page *page, + struct vm_area_struct *vma, int node); extern bool migrate_ratelimited(int node); #else -static inline int migrate_misplaced_page(struct page *page, int node) +static inline int migrate_misplaced_page(struct page *page, + struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ } diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b6e55ee885..81443d557a2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -581,11 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) * sets it, so none of the operations on it need to be atomic. */ -/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */ +/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) -#define LAST_NID_PGOFF (ZONES_PGOFF - LAST_NID_WIDTH) +#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -595,7 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -#define LAST_NID_PGSHIFT (LAST_NID_PGOFF * (LAST_NID_WIDTH != 0)) +#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS @@ -617,7 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define LAST_NID_MASK ((1UL << LAST_NID_WIDTH) - 1) +#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) @@ -661,51 +661,117 @@ static inline int page_to_nid(const struct page *page) #endif #ifdef CONFIG_NUMA_BALANCING -#ifdef LAST_NID_NOT_IN_PAGE_FLAGS -static inline int page_nid_xchg_last(struct page *page, int nid) +static inline int cpu_pid_to_cpupid(int cpu, int pid) { - return xchg(&page->_last_nid, nid); + return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); } -static inline int page_nid_last(struct page *page) +static inline int cpupid_to_pid(int cpupid) { - return page->_last_nid; + return cpupid & LAST__PID_MASK; } -static inline void page_nid_reset_last(struct page *page) + +static inline int cpupid_to_cpu(int cpupid) { - page->_last_nid = -1; + return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK; } -#else -static inline int page_nid_last(struct page *page) + +static inline int cpupid_to_nid(int cpupid) { - return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; + return cpu_to_node(cpupid_to_cpu(cpupid)); } -extern int page_nid_xchg_last(struct page *page, int nid); +static inline bool cpupid_pid_unset(int cpupid) +{ + return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK); +} -static inline void page_nid_reset_last(struct page *page) +static inline bool cpupid_cpu_unset(int cpupid) { - int nid = (1 << LAST_NID_SHIFT) - 1; + return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK); +} - page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); - page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; +static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) +{ + return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid); +} + +#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +{ + return xchg(&page->_last_cpupid, cpupid); +} + +static inline int page_cpupid_last(struct page *page) +{ + return page->_last_cpupid; +} +static inline void page_cpupid_reset_last(struct page *page) +{ + page->_last_cpupid = -1; } -#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ #else -static inline int page_nid_xchg_last(struct page *page, int nid) +static inline int page_cpupid_last(struct page *page) { - return page_to_nid(page); + return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } -static inline int page_nid_last(struct page *page) +extern int page_cpupid_xchg_last(struct page *page, int cpupid); + +static inline void page_cpupid_reset_last(struct page *page) { - return page_to_nid(page); + int cpupid = (1 << LAST_CPUPID_SHIFT) - 1; + + page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); + page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; +} +#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ +#else /* !CONFIG_NUMA_BALANCING */ +static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +{ + return page_to_nid(page); /* XXX */ } -static inline void page_nid_reset_last(struct page *page) +static inline int page_cpupid_last(struct page *page) { + return page_to_nid(page); /* XXX */ } -#endif + +static inline int cpupid_to_nid(int cpupid) +{ + return -1; +} + +static inline int cpupid_to_pid(int cpupid) +{ + return -1; +} + +static inline int cpupid_to_cpu(int cpupid) +{ + return -1; +} + +static inline int cpu_pid_to_cpupid(int nid, int pid) +{ + return -1; +} + +static inline bool cpupid_pid_unset(int cpupid) +{ + return 1; +} + +static inline void page_cpupid_reset_last(struct page *page) +{ +} + +static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) +{ + return false; +} +#endif /* CONFIG_NUMA_BALANCING */ static inline struct zone *page_zone(const struct page *page) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d9851eeb6e1..a3198e5aaf4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -174,8 +174,8 @@ struct page { void *shadow; #endif -#ifdef LAST_NID_NOT_IN_PAGE_FLAGS - int _last_nid; +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; #endif } /* @@ -420,28 +420,15 @@ struct mm_struct { */ unsigned long numa_next_scan; - /* numa_next_reset is when the PTE scanner period will be reset */ - unsigned long numa_next_reset; - /* Restart point for scanning and setting pte_numa */ unsigned long numa_scan_offset; /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; - - /* - * The first node a task was scheduled on. If a task runs on - * a different node than Make PTE Scan Go Now. - */ - int first_nid; #endif struct uprobes_state uprobes_state; }; -/* first nid will either be a valid NID or one of these values */ -#define NUMA_PTE_SCAN_INIT -1 -#define NUMA_PTE_SCAN_ACTIVE -2 - static inline void mm_init_cpumask(struct mm_struct *mm) { #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 93506a11403..da523661500 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -38,10 +38,10 @@ * The last is when there is insufficient space in page->flags and a separate * lookup is necessary. * - * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | - * " plus space for last_nid: | NODE | ZONE | LAST_NID ... | FLAGS | - * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | - * " plus space for last_nid: | SECTION | NODE | ZONE | LAST_NID ... | FLAGS | + * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | + * " plus space for last_cpupid: | NODE | ZONE | LAST_CPUPID ... | FLAGS | + * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | + * " plus space for last_cpupid: | SECTION | NODE | ZONE | LAST_CPUPID ... | FLAGS | * classic sparse no space for node: | SECTION | ZONE | ... | FLAGS | */ #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) @@ -62,15 +62,21 @@ #endif #ifdef CONFIG_NUMA_BALANCING -#define LAST_NID_SHIFT NODES_SHIFT +#define LAST__PID_SHIFT 8 +#define LAST__PID_MASK ((1 << LAST__PID_SHIFT)-1) + +#define LAST__CPU_SHIFT NR_CPUS_BITS +#define LAST__CPU_MASK ((1 << LAST__CPU_SHIFT)-1) + +#define LAST_CPUPID_SHIFT (LAST__PID_SHIFT+LAST__CPU_SHIFT) #else -#define LAST_NID_SHIFT 0 +#define LAST_CPUPID_SHIFT 0 #endif -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS -#define LAST_NID_WIDTH LAST_NID_SHIFT +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else -#define LAST_NID_WIDTH 0 +#define LAST_CPUPID_WIDTH 0 #endif /* @@ -81,8 +87,8 @@ #define NODE_NOT_IN_PAGE_FLAGS #endif -#if defined(CONFIG_NUMA_BALANCING) && LAST_NID_WIDTH == 0 -#define LAST_NID_NOT_IN_PAGE_FLAGS +#if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0 +#define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index f5d4723cdb3..a3d9dc8c2c0 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -6,106 +6,95 @@ * preempt_count (used for kernel preemption, interrupt count, etc.) */ -#include <linux/thread_info.h> #include <linux/linkage.h> #include <linux/list.h> -#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) - extern void add_preempt_count(int val); - extern void sub_preempt_count(int val); -#else -# define add_preempt_count(val) do { preempt_count() += (val); } while (0) -# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0) -#endif - -#define inc_preempt_count() add_preempt_count(1) -#define dec_preempt_count() sub_preempt_count(1) - -#define preempt_count() (current_thread_info()->preempt_count) - -#ifdef CONFIG_PREEMPT - -asmlinkage void preempt_schedule(void); - -#define preempt_check_resched() \ -do { \ - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ - preempt_schedule(); \ -} while (0) - -#ifdef CONFIG_CONTEXT_TRACKING +/* + * We use the MSB mostly because its available; see <linux/preempt_mask.h> for + * the other bits -- can't include that header due to inclusion hell. + */ +#define PREEMPT_NEED_RESCHED 0x80000000 -void preempt_schedule_context(void); +#include <asm/preempt.h> -#define preempt_check_resched_context() \ -do { \ - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ - preempt_schedule_context(); \ -} while (0) +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) +extern void preempt_count_add(int val); +extern void preempt_count_sub(int val); +#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) #else +#define preempt_count_add(val) __preempt_count_add(val) +#define preempt_count_sub(val) __preempt_count_sub(val) +#define preempt_count_dec_and_test() __preempt_count_dec_and_test() +#endif -#define preempt_check_resched_context() preempt_check_resched() - -#endif /* CONFIG_CONTEXT_TRACKING */ - -#else /* !CONFIG_PREEMPT */ - -#define preempt_check_resched() do { } while (0) -#define preempt_check_resched_context() do { } while (0) - -#endif /* CONFIG_PREEMPT */ +#define __preempt_count_inc() __preempt_count_add(1) +#define __preempt_count_dec() __preempt_count_sub(1) +#define preempt_count_inc() preempt_count_add(1) +#define preempt_count_dec() preempt_count_sub(1) #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ do { \ - inc_preempt_count(); \ + preempt_count_inc(); \ barrier(); \ } while (0) #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ - dec_preempt_count(); \ + preempt_count_dec(); \ } while (0) -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#ifdef CONFIG_PREEMPT #define preempt_enable() \ do { \ - preempt_enable_no_resched(); \ barrier(); \ - preempt_check_resched(); \ + if (unlikely(preempt_count_dec_and_test())) \ + __preempt_schedule(); \ +} while (0) + +#define preempt_check_resched() \ +do { \ + if (should_resched()) \ + __preempt_schedule(); \ } while (0) -/* For debugging and tracer internals only! */ -#define add_preempt_count_notrace(val) \ - do { preempt_count() += (val); } while (0) -#define sub_preempt_count_notrace(val) \ - do { preempt_count() -= (val); } while (0) -#define inc_preempt_count_notrace() add_preempt_count_notrace(1) -#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) +#else +#define preempt_enable() preempt_enable_no_resched() +#define preempt_check_resched() do { } while (0) +#endif #define preempt_disable_notrace() \ do { \ - inc_preempt_count_notrace(); \ + __preempt_count_inc(); \ barrier(); \ } while (0) #define preempt_enable_no_resched_notrace() \ do { \ barrier(); \ - dec_preempt_count_notrace(); \ + __preempt_count_dec(); \ } while (0) -/* preempt_check_resched is OK to trace */ +#ifdef CONFIG_PREEMPT + +#ifndef CONFIG_CONTEXT_TRACKING +#define __preempt_schedule_context() __preempt_schedule() +#endif + #define preempt_enable_notrace() \ do { \ - preempt_enable_no_resched_notrace(); \ barrier(); \ - preempt_check_resched_context(); \ + if (unlikely(__preempt_count_dec_and_test())) \ + __preempt_schedule_context(); \ } while (0) +#else +#define preempt_enable_notrace() preempt_enable_no_resched_notrace() +#endif #else /* !CONFIG_PREEMPT_COUNT */ @@ -115,10 +104,11 @@ do { \ * that can cause faults and scheduling migrate into our preempt-protected * region. */ -#define preempt_disable() barrier() +#define preempt_disable() barrier() #define sched_preempt_enable_no_resched() barrier() -#define preempt_enable_no_resched() barrier() -#define preempt_enable() barrier() +#define preempt_enable_no_resched() barrier() +#define preempt_enable() barrier() +#define preempt_check_resched() do { } while (0) #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() diff --git a/include/linux/sched.h b/include/linux/sched.h index e27baeeda3f..045b0d22784 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -22,6 +22,7 @@ struct sched_param { #include <linux/errno.h> #include <linux/nodemask.h> #include <linux/mm_types.h> +#include <linux/preempt.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -427,6 +428,14 @@ struct task_cputime { .sum_exec_runtime = 0, \ } +#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED) + +#ifdef CONFIG_PREEMPT_COUNT +#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) +#else +#define PREEMPT_DISABLED PREEMPT_ENABLED +#endif + /* * Disable preemption until the scheduler is running. * Reset by start_kernel()->sched_init()->init_idle(). @@ -434,7 +443,7 @@ struct task_cputime { * We include PREEMPT_ACTIVE to avoid cond_resched() from working * before the scheduler is active -- see should_resched(). */ -#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) +#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE) /** * struct thread_group_cputimer - thread group interval timer counts @@ -768,6 +777,7 @@ enum cpu_idle_type { #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ +#define SD_NUMA 0x4000 /* cross-node balancing */ extern int __weak arch_sd_sibiling_asym_packing(void); @@ -811,6 +821,10 @@ struct sched_domain { u64 last_update; + /* idle_balance() stats */ + u64 max_newidle_lb_cost; + unsigned long next_decay_max_lb_cost; + #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -1029,6 +1043,8 @@ struct task_struct { struct task_struct *last_wakee; unsigned long wakee_flips; unsigned long wakee_flip_decay_ts; + + int wake_cpu; #endif int on_rq; @@ -1324,10 +1340,41 @@ struct task_struct { #endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; - int numa_migrate_seq; unsigned int numa_scan_period; + unsigned int numa_scan_period_max; + int numa_preferred_nid; + int numa_migrate_deferred; + unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ struct callback_head numa_work; + + struct list_head numa_entry; + struct numa_group *numa_group; + + /* + * Exponential decaying average of faults on a per-node basis. + * Scheduling placement decisions are made based on the these counts. + * The values remain static for the duration of a PTE scan + */ + unsigned long *numa_faults; + unsigned long total_numa_faults; + + /* + * numa_faults_buffer records faults per node during the current + * scan window. When the scan completes, the counts in numa_faults + * decay and these values are copied. + */ + unsigned long *numa_faults_buffer; + + /* + * numa_faults_locality tracks if faults recorded during the last + * scan window were remote/local. The task scan period is adapted + * based on the locality of the faults with different weights + * depending on whether they were shared or private faults + */ + unsigned long numa_faults_locality[2]; + + unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ struct rcu_head rcu; @@ -1412,16 +1459,33 @@ struct task_struct { /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) +#define TNF_MIGRATED 0x01 +#define TNF_NO_GROUP 0x02 +#define TNF_SHARED 0x04 +#define TNF_FAULT_LOCAL 0x08 + #ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int node, int pages, bool migrated); +extern void task_numa_fault(int last_node, int node, int pages, int flags); +extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); +extern void task_numa_free(struct task_struct *p); + +extern unsigned int sysctl_numa_balancing_migrate_deferred; #else -static inline void task_numa_fault(int node, int pages, bool migrated) +static inline void task_numa_fault(int last_node, int node, int pages, + int flags) { } +static inline pid_t task_numa_group_id(struct task_struct *p) +{ + return 0; +} static inline void set_numabalancing_state(bool enabled) { } +static inline void task_numa_free(struct task_struct *p) +{ +} #endif static inline struct pid *task_pid(struct task_struct *task) @@ -1974,7 +2038,7 @@ extern void wake_up_new_task(struct task_struct *tsk); #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void sched_fork(struct task_struct *p); +extern void sched_fork(unsigned long clone_flags, struct task_struct *p); extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); @@ -2401,11 +2465,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } -static inline int need_resched(void) -{ - return unlikely(test_thread_flag(TIF_NEED_RESCHED)); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -2474,36 +2533,105 @@ static inline int tsk_is_polling(struct task_struct *p) { return task_thread_info(p)->status & TS_POLLING; } -static inline void current_set_polling(void) +static inline void __current_set_polling(void) { current_thread_info()->status |= TS_POLLING; } -static inline void current_clr_polling(void) +static inline bool __must_check current_set_polling_and_test(void) +{ + __current_set_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + */ + smp_mb(); + + return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void) { current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); +} + +static inline bool __must_check current_clr_polling_and_test(void) +{ + __current_clr_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + */ + smp_mb(); + + return unlikely(tif_need_resched()); } #elif defined(TIF_POLLING_NRFLAG) static inline int tsk_is_polling(struct task_struct *p) { return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); } -static inline void current_set_polling(void) + +static inline void __current_set_polling(void) { set_thread_flag(TIF_POLLING_NRFLAG); } -static inline void current_clr_polling(void) +static inline bool __must_check current_set_polling_and_test(void) +{ + __current_set_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + * + * XXX: assumes set/clear bit are identical barrier wise. + */ + smp_mb__after_clear_bit(); + + return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void) { clear_thread_flag(TIF_POLLING_NRFLAG); } + +static inline bool __must_check current_clr_polling_and_test(void) +{ + __current_clr_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + */ + smp_mb__after_clear_bit(); + + return unlikely(tif_need_resched()); +} + #else static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void current_set_polling(void) { } -static inline void current_clr_polling(void) { } +static inline void __current_set_polling(void) { } +static inline void __current_clr_polling(void) { } + +static inline bool __must_check current_set_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +static inline bool __must_check current_clr_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} #endif +static __always_inline bool need_resched(void) +{ + return unlikely(tif_need_resched()); +} + /* * Thread group CPU time accounting. */ @@ -2545,6 +2673,11 @@ static inline unsigned int task_cpu(const struct task_struct *p) return task_thread_info(p)->cpu; } +static inline int task_node(const struct task_struct *p) +{ + return cpu_to_node(task_cpu(p)); +} + extern void set_task_cpu(struct task_struct *p, unsigned int cpu); #else diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index bf8086b2506..10d16c4fbe8 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -47,7 +47,6 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_max; -extern unsigned int sysctl_numa_balancing_scan_period_reset; extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_numa_balancing_settle_count; diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 3b5e910d14c..d2abbdb8c6a 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -28,6 +28,7 @@ struct cpu_stop_work { }; int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); +int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg); void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e7e04736802..fddbe2023a5 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -104,8 +104,21 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) -#define set_need_resched() set_thread_flag(TIF_NEED_RESCHED) -#define clear_need_resched() clear_thread_flag(TIF_NEED_RESCHED) +static inline __deprecated void set_need_resched(void) +{ + /* + * Use of this function in deprecated. + * + * As of this writing there are only a few users in the DRM tree left + * all of which are wrong and can be removed without causing too much + * grief. + * + * The DRM people are aware and are working on removing the last few + * instances. + */ +} + +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK /* diff --git a/include/linux/topology.h b/include/linux/topology.h index d3cf0d6e771..12ae6ce997d 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -106,6 +106,8 @@ int arch_update_cpu_topology(void); .last_balance = jiffies, \ .balance_interval = 1, \ .smt_gain = 1178, /* 15% */ \ + .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #endif #endif /* CONFIG_SCHED_SMT */ @@ -135,6 +137,8 @@ int arch_update_cpu_topology(void); , \ .last_balance = jiffies, \ .balance_interval = 1, \ + .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #endif #endif /* CONFIG_SCHED_MC */ @@ -166,6 +170,8 @@ int arch_update_cpu_topology(void); , \ .last_balance = jiffies, \ .balance_interval = 1, \ + .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #endif diff --git a/include/linux/tty.h b/include/linux/tty.h index 2f47989d828..97d660ed70c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -671,31 +671,17 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty, #define wait_event_interruptible_tty(tty, wq, condition) \ ({ \ int __ret = 0; \ - if (!(condition)) { \ - __wait_event_interruptible_tty(tty, wq, condition, __ret); \ - } \ + if (!(condition)) \ + __ret = __wait_event_interruptible_tty(tty, wq, \ + condition); \ __ret; \ }) -#define __wait_event_interruptible_tty(tty, wq, condition, ret) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (!signal_pending(current)) { \ - tty_unlock(tty); \ +#define __wait_event_interruptible_tty(tty, wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + tty_unlock(tty); \ schedule(); \ - tty_lock(tty); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - finish_wait(&wq, &__wait); \ -} while (0) + tty_lock(tty)) #ifdef CONFIG_PROC_FS extern void proc_tty_register_driver(struct tty_driver *); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5ca0951e185..9d8cf056e66 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -15,7 +15,7 @@ */ static inline void pagefault_disable(void) { - inc_preempt_count(); + preempt_count_inc(); /* * make sure to have issued the store before a pagefault * can hit. @@ -30,11 +30,7 @@ static inline void pagefault_enable(void) * the pagefault handler again. */ barrier(); - dec_preempt_count(); - /* - * make sure we do.. - */ - barrier(); + preempt_count_dec(); preempt_check_resched(); } diff --git a/include/linux/wait.h b/include/linux/wait.h index a67fc163559..61939ba30aa 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -1,7 +1,8 @@ #ifndef _LINUX_WAIT_H #define _LINUX_WAIT_H - - +/* + * Linux wait queue related types and methods + */ #include <linux/list.h> #include <linux/stddef.h> #include <linux/spinlock.h> @@ -13,27 +14,27 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); struct __wait_queue { - unsigned int flags; + unsigned int flags; #define WQ_FLAG_EXCLUSIVE 0x01 - void *private; - wait_queue_func_t func; - struct list_head task_list; + void *private; + wait_queue_func_t func; + struct list_head task_list; }; struct wait_bit_key { - void *flags; - int bit_nr; -#define WAIT_ATOMIC_T_BIT_NR -1 + void *flags; + int bit_nr; +#define WAIT_ATOMIC_T_BIT_NR -1 }; struct wait_bit_queue { - struct wait_bit_key key; - wait_queue_t wait; + struct wait_bit_key key; + wait_queue_t wait; }; struct __wait_queue_head { - spinlock_t lock; - struct list_head task_list; + spinlock_t lock; + struct list_head task_list; }; typedef struct __wait_queue_head wait_queue_head_t; @@ -84,17 +85,17 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) { - q->flags = 0; - q->private = p; - q->func = default_wake_function; + q->flags = 0; + q->private = p; + q->func = default_wake_function; } -static inline void init_waitqueue_func_entry(wait_queue_t *q, - wait_queue_func_t func) +static inline void +init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) { - q->flags = 0; - q->private = NULL; - q->func = func; + q->flags = 0; + q->private = NULL; + q->func = func; } static inline int waitqueue_active(wait_queue_head_t *q) @@ -114,8 +115,8 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) /* * Used for wake-one threads: */ -static inline void __add_wait_queue_exclusive(wait_queue_head_t *q, - wait_queue_t *wait) +static inline void +__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) { wait->flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue(q, wait); @@ -127,23 +128,22 @@ static inline void __add_wait_queue_tail(wait_queue_head_t *head, list_add_tail(&new->task_list, &head->task_list); } -static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q, - wait_queue_t *wait) +static inline void +__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait) { wait->flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue_tail(q, wait); } -static inline void __remove_wait_queue(wait_queue_head_t *head, - wait_queue_t *old) +static inline void +__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) { list_del(&old->task_list); } void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); -void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, - void *key); +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_bit(wait_queue_head_t *, void *, int); @@ -170,27 +170,64 @@ wait_queue_head_t *bit_waitqueue(void *, int); /* * Wakeup macros to be used to report events to the targets. */ -#define wake_up_poll(x, m) \ +#define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, (void *) (m)) -#define wake_up_locked_poll(x, m) \ +#define wake_up_locked_poll(x, m) \ __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) -#define wake_up_interruptible_poll(x, m) \ +#define wake_up_interruptible_poll(x, m) \ __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) #define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) -#define __wait_event(wq, condition) \ -do { \ - DEFINE_WAIT(__wait); \ +#define ___wait_cond_timeout(condition) \ +({ \ + bool __cond = (condition); \ + if (__cond && !__ret) \ + __ret = 1; \ + __cond || !__ret; \ +}) + +#define ___wait_is_interruptible(state) \ + (!__builtin_constant_p(state) || \ + state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ + +#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ +({ \ + __label__ __out; \ + wait_queue_t __wait; \ + long __ret = ret; \ + \ + INIT_LIST_HEAD(&__wait.task_list); \ + if (exclusive) \ + __wait.flags = WQ_FLAG_EXCLUSIVE; \ + else \ + __wait.flags = 0; \ \ for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ + long __int = prepare_to_wait_event(&wq, &__wait, state);\ + \ if (condition) \ break; \ - schedule(); \ + \ + if (___wait_is_interruptible(state) && __int) { \ + __ret = __int; \ + if (exclusive) { \ + abort_exclusive_wait(&wq, &__wait, \ + state, NULL); \ + goto __out; \ + } \ + break; \ + } \ + \ + cmd; \ } \ finish_wait(&wq, &__wait); \ -} while (0) +__out: __ret; \ +}) + +#define __wait_event(wq, condition) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + schedule()) /** * wait_event - sleep until a condition gets true @@ -204,29 +241,17 @@ do { \ * wake_up() has to be called after changing any variable that could * change the result of the wait condition. */ -#define wait_event(wq, condition) \ +#define wait_event(wq, condition) \ do { \ - if (condition) \ + if (condition) \ break; \ __wait_event(wq, condition); \ } while (0) -#define __wait_event_timeout(wq, condition, ret) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - ret = schedule_timeout(ret); \ - if (!ret) \ - break; \ - } \ - if (!ret && (condition)) \ - ret = 1; \ - finish_wait(&wq, &__wait); \ -} while (0) +#define __wait_event_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) /** * wait_event_timeout - sleep until a condition gets true or a timeout elapses @@ -248,28 +273,14 @@ do { \ #define wait_event_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ - if (!(condition)) \ - __wait_event_timeout(wq, condition, __ret); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_timeout(wq, condition, timeout); \ __ret; \ }) -#define __wait_event_interruptible(wq, condition, ret) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (!signal_pending(current)) { \ - schedule(); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - finish_wait(&wq, &__wait); \ -} while (0) +#define __wait_event_interruptible(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + schedule()) /** * wait_event_interruptible - sleep until a condition gets true @@ -290,31 +301,14 @@ do { \ ({ \ int __ret = 0; \ if (!(condition)) \ - __wait_event_interruptible(wq, condition, __ret); \ + __ret = __wait_event_interruptible(wq, condition); \ __ret; \ }) -#define __wait_event_interruptible_timeout(wq, condition, ret) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (!signal_pending(current)) { \ - ret = schedule_timeout(ret); \ - if (!ret) \ - break; \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - if (!ret && (condition)) \ - ret = 1; \ - finish_wait(&wq, &__wait); \ -} while (0) +#define __wait_event_interruptible_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) /** * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses @@ -337,15 +331,15 @@ do { \ #define wait_event_interruptible_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ - if (!(condition)) \ - __wait_event_interruptible_timeout(wq, condition, __ret); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_interruptible_timeout(wq, \ + condition, timeout); \ __ret; \ }) #define __wait_event_hrtimeout(wq, condition, timeout, state) \ ({ \ int __ret = 0; \ - DEFINE_WAIT(__wait); \ struct hrtimer_sleeper __t; \ \ hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \ @@ -356,25 +350,15 @@ do { \ current->timer_slack_ns, \ HRTIMER_MODE_REL); \ \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, state); \ - if (condition) \ - break; \ - if (state == TASK_INTERRUPTIBLE && \ - signal_pending(current)) { \ - __ret = -ERESTARTSYS; \ - break; \ - } \ + __ret = ___wait_event(wq, condition, state, 0, 0, \ if (!__t.task) { \ __ret = -ETIME; \ break; \ } \ - schedule(); \ - } \ + schedule()); \ \ hrtimer_cancel(&__t.timer); \ destroy_hrtimer_on_stack(&__t.timer); \ - finish_wait(&wq, &__wait); \ __ret; \ }) @@ -428,33 +412,15 @@ do { \ __ret; \ }) -#define __wait_event_interruptible_exclusive(wq, condition, ret) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait_exclusive(&wq, &__wait, \ - TASK_INTERRUPTIBLE); \ - if (condition) { \ - finish_wait(&wq, &__wait); \ - break; \ - } \ - if (!signal_pending(current)) { \ - schedule(); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - abort_exclusive_wait(&wq, &__wait, \ - TASK_INTERRUPTIBLE, NULL); \ - break; \ - } \ -} while (0) +#define __wait_event_interruptible_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ + schedule()) #define wait_event_interruptible_exclusive(wq, condition) \ ({ \ int __ret = 0; \ if (!(condition)) \ - __wait_event_interruptible_exclusive(wq, condition, __ret);\ + __ret = __wait_event_interruptible_exclusive(wq, condition);\ __ret; \ }) @@ -606,24 +572,8 @@ do { \ ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) - -#define __wait_event_killable(wq, condition, ret) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_KILLABLE); \ - if (condition) \ - break; \ - if (!fatal_signal_pending(current)) { \ - schedule(); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - finish_wait(&wq, &__wait); \ -} while (0) +#define __wait_event_killable(wq, condition) \ + ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule()) /** * wait_event_killable - sleep until a condition gets true @@ -644,26 +594,17 @@ do { \ ({ \ int __ret = 0; \ if (!(condition)) \ - __wait_event_killable(wq, condition, __ret); \ + __ret = __wait_event_killable(wq, condition); \ __ret; \ }) #define __wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock); \ - } \ - finish_wait(&wq, &__wait); \ -} while (0) + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock)) /** * wait_event_lock_irq_cmd - sleep until a condition gets true. The @@ -723,26 +664,12 @@ do { \ } while (0) -#define __wait_event_interruptible_lock_irq(wq, condition, \ - lock, ret, cmd) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (signal_pending(current)) { \ - ret = -ERESTARTSYS; \ - break; \ - } \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock); \ - } \ - finish_wait(&wq, &__wait); \ -} while (0) +#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock)) /** * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true. @@ -772,10 +699,9 @@ do { \ #define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ ({ \ int __ret = 0; \ - \ if (!(condition)) \ - __wait_event_interruptible_lock_irq(wq, condition, \ - lock, __ret, cmd); \ + __ret = __wait_event_interruptible_lock_irq(wq, \ + condition, lock, cmd); \ __ret; \ }) @@ -804,39 +730,24 @@ do { \ #define wait_event_interruptible_lock_irq(wq, condition, lock) \ ({ \ int __ret = 0; \ - \ if (!(condition)) \ - __wait_event_interruptible_lock_irq(wq, condition, \ - lock, __ret, ); \ + __ret = __wait_event_interruptible_lock_irq(wq, \ + condition, lock,); \ __ret; \ }) #define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ - lock, ret) \ -do { \ - DEFINE_WAIT(__wait); \ - \ - for (;;) { \ - prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (signal_pending(current)) { \ - ret = -ERESTARTSYS; \ - break; \ - } \ - spin_unlock_irq(&lock); \ - ret = schedule_timeout(ret); \ - spin_lock_irq(&lock); \ - if (!ret) \ - break; \ - } \ - finish_wait(&wq, &__wait); \ -} while (0) + lock, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + spin_unlock_irq(&lock); \ + __ret = schedule_timeout(__ret); \ + spin_lock_irq(&lock)); /** - * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses. - * The condition is checked under the lock. This is expected - * to be called with the lock taken. + * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets + * true or a timeout elapses. The condition is checked under + * the lock. This is expected to be called with the lock taken. * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @lock: a locked spinlock_t, which will be released before schedule() @@ -860,11 +771,10 @@ do { \ #define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ timeout) \ ({ \ - int __ret = timeout; \ - \ - if (!(condition)) \ - __wait_event_interruptible_lock_irq_timeout( \ - wq, condition, lock, __ret); \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_interruptible_lock_irq_timeout( \ + wq, condition, lock, timeout); \ __ret; \ }) @@ -875,20 +785,18 @@ do { \ * We plan to remove these interfaces. */ extern void sleep_on(wait_queue_head_t *q); -extern long sleep_on_timeout(wait_queue_head_t *q, - signed long timeout); +extern long sleep_on_timeout(wait_queue_head_t *q, signed long timeout); extern void interruptible_sleep_on(wait_queue_head_t *q); -extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, - signed long timeout); +extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout); /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); +long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); -void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, - unsigned int mode, void *key); +void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key); int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); @@ -934,8 +842,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); * One uses wait_on_bit() where one is waiting for the bit to clear, * but has no intention of setting it. */ -static inline int wait_on_bit(void *word, int bit, - int (*action)(void *), unsigned mode) +static inline int +wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode) { if (!test_bit(bit, word)) return 0; @@ -958,8 +866,8 @@ static inline int wait_on_bit(void *word, int bit, * One uses wait_on_bit_lock() where one is waiting for the bit to * clear with the intention of setting it, and when done, clearing it. */ -static inline int wait_on_bit_lock(void *word, int bit, - int (*action)(void *), unsigned mode) +static inline int +wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode) { if (!test_and_set_bit(bit, word)) return 0; @@ -983,5 +891,5 @@ int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) return 0; return out_of_line_wait_on_atomic_t(val, action, mode); } - -#endif + +#endif /* _LINUX_WAIT_H */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 2e7d9947a10..613381bcde4 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p) /* * For all intents and purposes a preempted task is a running task. */ - if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE) + if (task_preempt_count(p) & PREEMPT_ACTIVE) state = TASK_RUNNING | TASK_STATE_MAX; #endif |