summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-12 10:20:12 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-12 10:20:12 +0900
commit39cf275a1a18ba3c7eb9b986c5c9b35b57332798 (patch)
tree40b119ca9d2fbaf8128d3fa25f4c64669002b0c0 /include
parentad5d69899e52792671c1aa6c7360464c7edfe09c (diff)
parente5137b50a0640009fd63a3e65c14bc6e1be8796a (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar: "The main changes in this cycle are: - (much) improved CONFIG_NUMA_BALANCING support from Mel Gorman, Rik van Riel, Peter Zijlstra et al. Yay! - optimize preemption counter handling: merge the NEED_RESCHED flag into the preempt_count variable, by Peter Zijlstra. - wait.h fixes and code reorganization from Peter Zijlstra - cfs_bandwidth fixes from Ben Segall - SMP load-balancer cleanups from Peter Zijstra - idle balancer improvements from Jason Low - other fixes and cleanups" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (129 commits) ftrace, sched: Add TRACE_FLAG_PREEMPT_RESCHED stop_machine: Fix race between stop_two_cpus() and stop_cpus() sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus sched: Fix asymmetric scheduling for POWER7 sched: Move completion code from core.c to completion.c sched: Move wait code from core.c to wait.c sched: Move wait.c into kernel/sched/ sched/wait: Fix __wait_event_interruptible_lock_irq_timeout() sched: Avoid throttle_cfs_rq() racing with period_timer stopping sched: Guarantee new group-entities always have weight sched: Fix hrtimer_cancel()/rq->lock deadlock sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining sched: Fix race on toggling cfs_bandwidth_used sched: Remove extra put_online_cpus() inside sched_setaffinity() sched/rt: Fix task_tick_rt() comment sched/wait: Fix build breakage sched/wait: Introduce prepare_to_wait_event() sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too sched: Remove get_online_cpus() usage sched: Fix race in migrate_swap_stop() ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/preempt.h105
-rw-r--r--include/linux/completion.h2
-rw-r--r--include/linux/hardirq.h8
-rw-r--r--include/linux/mempolicy.h1
-rw-r--r--include/linux/migrate.h7
-rw-r--r--include/linux/mm.h118
-rw-r--r--include/linux/mm_types.h17
-rw-r--r--include/linux/page-flags-layout.h28
-rw-r--r--include/linux/preempt.h112
-rw-r--r--include/linux/sched.h167
-rw-r--r--include/linux/sched/sysctl.h1
-rw-r--r--include/linux/stop_machine.h1
-rw-r--r--include/linux/thread_info.h17
-rw-r--r--include/linux/topology.h6
-rw-r--r--include/linux/tty.h28
-rw-r--r--include/linux/uaccess.h8
-rw-r--r--include/linux/wait.h374
-rw-r--r--include/trace/events/sched.h2
18 files changed, 600 insertions, 402 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
new file mode 100644
index 00000000000..ddf2b420ac8
--- /dev/null
+++ b/include/asm-generic/preempt.h
@@ -0,0 +1,105 @@
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
+static __always_inline int preempt_count(void)
+{
+ return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+ return &current_thread_info()->preempt_count;
+}
+
+/*
+ * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
+ * alternative is loosing a reschedule. Better schedule too often -- also this
+ * should be a very rare operation.
+ */
+static __always_inline void preempt_count_set(int pc)
+{
+ *preempt_count_ptr() = pc;
+}
+
+/*
+ * must be macros to avoid header recursion hell
+ */
+#define task_preempt_count(p) \
+ (task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED)
+
+#define init_task_preempt_count(p) do { \
+ task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+ *preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+ *preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+ return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
+/*
+ * The various preempt_count add/sub methods
+ */
+
+static __always_inline void __preempt_count_add(int val)
+{
+ *preempt_count_ptr() += val;
+}
+
+static __always_inline void __preempt_count_sub(int val)
+{
+ *preempt_count_ptr() -= val;
+}
+
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+ return !--*preempt_count_ptr();
+}
+
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
+static __always_inline bool should_resched(void)
+{
+ return unlikely(!*preempt_count_ptr());
+}
+
+#ifdef CONFIG_PREEMPT
+extern asmlinkage void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern asmlinkage void preempt_schedule_context(void);
+#define __preempt_schedule_context() preempt_schedule_context()
+#endif
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 3cd574d5b19..22c33e35bcb 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -5,7 +5,7 @@
* (C) Copyright 2001 Linus Torvalds
*
* Atomic wait-for-completion handler data structures.
- * See kernel/sched/core.c for details.
+ * See kernel/sched/completion.c for details.
*/
#include <linux/wait.h>
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 1e041063b22..d9cf963ac83 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -33,7 +33,7 @@ extern void rcu_nmi_exit(void);
#define __irq_enter() \
do { \
account_irq_enter_time(current); \
- add_preempt_count(HARDIRQ_OFFSET); \
+ preempt_count_add(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
@@ -49,7 +49,7 @@ extern void irq_enter(void);
do { \
trace_hardirq_exit(); \
account_irq_exit_time(current); \
- sub_preempt_count(HARDIRQ_OFFSET); \
+ preempt_count_sub(HARDIRQ_OFFSET); \
} while (0)
/*
@@ -62,7 +62,7 @@ extern void irq_exit(void);
lockdep_off(); \
ftrace_nmi_enter(); \
BUG_ON(in_nmi()); \
- add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+ preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
rcu_nmi_enter(); \
trace_hardirq_enter(); \
} while (0)
@@ -72,7 +72,7 @@ extern void irq_exit(void);
trace_hardirq_exit(); \
rcu_nmi_exit(); \
BUG_ON(!in_nmi()); \
- sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+ preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
ftrace_nmi_exit(); \
lockdep_on(); \
} while (0)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index da6716b9e3f..ea4d2495c64 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -136,6 +136,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
struct mempolicy *get_vma_policy(struct task_struct *tsk,
struct vm_area_struct *vma, unsigned long addr);
+bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma);
extern void numa_default_policy(void);
extern void numa_policy_init(void);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 8d3c57fdf22..f5096b58b20 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -90,11 +90,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
#endif /* CONFIG_MIGRATION */
#ifdef CONFIG_NUMA_BALANCING
-extern int migrate_misplaced_page(struct page *page, int node);
-extern int migrate_misplaced_page(struct page *page, int node);
+extern int migrate_misplaced_page(struct page *page,
+ struct vm_area_struct *vma, int node);
extern bool migrate_ratelimited(int node);
#else
-static inline int migrate_misplaced_page(struct page *page, int node)
+static inline int migrate_misplaced_page(struct page *page,
+ struct vm_area_struct *vma, int node)
{
return -EAGAIN; /* can't migrate now */
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55ee885..81443d557a2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,11 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
* sets it, so none of the operations on it need to be atomic.
*/
-/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
-#define LAST_NID_PGOFF (ZONES_PGOFF - LAST_NID_WIDTH)
+#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
/*
* Define the bit shifts to access each section. For non-existent
@@ -595,7 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))
#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))
-#define LAST_NID_PGSHIFT (LAST_NID_PGOFF * (LAST_NID_WIDTH != 0))
+#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
#ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -617,7 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_NID_MASK ((1UL << LAST_NID_WIDTH) - 1)
+#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1)
#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
static inline enum zone_type page_zonenum(const struct page *page)
@@ -661,51 +661,117 @@ static inline int page_to_nid(const struct page *page)
#endif
#ifdef CONFIG_NUMA_BALANCING
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-static inline int page_nid_xchg_last(struct page *page, int nid)
+static inline int cpu_pid_to_cpupid(int cpu, int pid)
{
- return xchg(&page->_last_nid, nid);
+ return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
}
-static inline int page_nid_last(struct page *page)
+static inline int cpupid_to_pid(int cpupid)
{
- return page->_last_nid;
+ return cpupid & LAST__PID_MASK;
}
-static inline void page_nid_reset_last(struct page *page)
+
+static inline int cpupid_to_cpu(int cpupid)
{
- page->_last_nid = -1;
+ return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;
}
-#else
-static inline int page_nid_last(struct page *page)
+
+static inline int cpupid_to_nid(int cpupid)
{
- return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK;
+ return cpu_to_node(cpupid_to_cpu(cpupid));
}
-extern int page_nid_xchg_last(struct page *page, int nid);
+static inline bool cpupid_pid_unset(int cpupid)
+{
+ return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK);
+}
-static inline void page_nid_reset_last(struct page *page)
+static inline bool cpupid_cpu_unset(int cpupid)
{
- int nid = (1 << LAST_NID_SHIFT) - 1;
+ return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
+}
- page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
- page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
+static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
+{
+ return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
+}
+
+#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
+{
+ return xchg(&page->_last_cpupid, cpupid);
+}
+
+static inline int page_cpupid_last(struct page *page)
+{
+ return page->_last_cpupid;
+}
+static inline void page_cpupid_reset_last(struct page *page)
+{
+ page->_last_cpupid = -1;
}
-#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */
#else
-static inline int page_nid_xchg_last(struct page *page, int nid)
+static inline int page_cpupid_last(struct page *page)
{
- return page_to_nid(page);
+ return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
}
-static inline int page_nid_last(struct page *page)
+extern int page_cpupid_xchg_last(struct page *page, int cpupid);
+
+static inline void page_cpupid_reset_last(struct page *page)
{
- return page_to_nid(page);
+ int cpupid = (1 << LAST_CPUPID_SHIFT) - 1;
+
+ page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
+ page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
+}
+#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
+#else /* !CONFIG_NUMA_BALANCING */
+static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
+{
+ return page_to_nid(page); /* XXX */
}
-static inline void page_nid_reset_last(struct page *page)
+static inline int page_cpupid_last(struct page *page)
{
+ return page_to_nid(page); /* XXX */
}
-#endif
+
+static inline int cpupid_to_nid(int cpupid)
+{
+ return -1;
+}
+
+static inline int cpupid_to_pid(int cpupid)
+{
+ return -1;
+}
+
+static inline int cpupid_to_cpu(int cpupid)
+{
+ return -1;
+}
+
+static inline int cpu_pid_to_cpupid(int nid, int pid)
+{
+ return -1;
+}
+
+static inline bool cpupid_pid_unset(int cpupid)
+{
+ return 1;
+}
+
+static inline void page_cpupid_reset_last(struct page *page)
+{
+}
+
+static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
+{
+ return false;
+}
+#endif /* CONFIG_NUMA_BALANCING */
static inline struct zone *page_zone(const struct page *page)
{
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d9851eeb6e1..a3198e5aaf4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -174,8 +174,8 @@ struct page {
void *shadow;
#endif
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
- int _last_nid;
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+ int _last_cpupid;
#endif
}
/*
@@ -420,28 +420,15 @@ struct mm_struct {
*/
unsigned long numa_next_scan;
- /* numa_next_reset is when the PTE scanner period will be reset */
- unsigned long numa_next_reset;
-
/* Restart point for scanning and setting pte_numa */
unsigned long numa_scan_offset;
/* numa_scan_seq prevents two threads setting pte_numa */
int numa_scan_seq;
-
- /*
- * The first node a task was scheduled on. If a task runs on
- * a different node than Make PTE Scan Go Now.
- */
- int first_nid;
#endif
struct uprobes_state uprobes_state;
};
-/* first nid will either be a valid NID or one of these values */
-#define NUMA_PTE_SCAN_INIT -1
-#define NUMA_PTE_SCAN_ACTIVE -2
-
static inline void mm_init_cpumask(struct mm_struct *mm)
{
#ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 93506a11403..da523661500 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -38,10 +38,10 @@
* The last is when there is insufficient space in page->flags and a separate
* lookup is necessary.
*
- * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS |
- * " plus space for last_nid: | NODE | ZONE | LAST_NID ... | FLAGS |
- * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS |
- * " plus space for last_nid: | SECTION | NODE | ZONE | LAST_NID ... | FLAGS |
+ * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS |
+ * " plus space for last_cpupid: | NODE | ZONE | LAST_CPUPID ... | FLAGS |
+ * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS |
+ * " plus space for last_cpupid: | SECTION | NODE | ZONE | LAST_CPUPID ... | FLAGS |
* classic sparse no space for node: | SECTION | ZONE | ... | FLAGS |
*/
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -62,15 +62,21 @@
#endif
#ifdef CONFIG_NUMA_BALANCING
-#define LAST_NID_SHIFT NODES_SHIFT
+#define LAST__PID_SHIFT 8
+#define LAST__PID_MASK ((1 << LAST__PID_SHIFT)-1)
+
+#define LAST__CPU_SHIFT NR_CPUS_BITS
+#define LAST__CPU_MASK ((1 << LAST__CPU_SHIFT)-1)
+
+#define LAST_CPUPID_SHIFT (LAST__PID_SHIFT+LAST__CPU_SHIFT)
#else
-#define LAST_NID_SHIFT 0
+#define LAST_CPUPID_SHIFT 0
#endif
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-#define LAST_NID_WIDTH LAST_NID_SHIFT
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
#else
-#define LAST_NID_WIDTH 0
+#define LAST_CPUPID_WIDTH 0
#endif
/*
@@ -81,8 +87,8 @@
#define NODE_NOT_IN_PAGE_FLAGS
#endif
-#if defined(CONFIG_NUMA_BALANCING) && LAST_NID_WIDTH == 0
-#define LAST_NID_NOT_IN_PAGE_FLAGS
+#if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0
+#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
#endif
#endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index f5d4723cdb3..a3d9dc8c2c0 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -6,106 +6,95 @@
* preempt_count (used for kernel preemption, interrupt count, etc.)
*/
-#include <linux/thread_info.h>
#include <linux/linkage.h>
#include <linux/list.h>
-#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
- extern void add_preempt_count(int val);
- extern void sub_preempt_count(int val);
-#else
-# define add_preempt_count(val) do { preempt_count() += (val); } while (0)
-# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0)
-#endif
-
-#define inc_preempt_count() add_preempt_count(1)
-#define dec_preempt_count() sub_preempt_count(1)
-
-#define preempt_count() (current_thread_info()->preempt_count)
-
-#ifdef CONFIG_PREEMPT
-
-asmlinkage void preempt_schedule(void);
-
-#define preempt_check_resched() \
-do { \
- if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
- preempt_schedule(); \
-} while (0)
-
-#ifdef CONFIG_CONTEXT_TRACKING
+/*
+ * We use the MSB mostly because its available; see <linux/preempt_mask.h> for
+ * the other bits -- can't include that header due to inclusion hell.
+ */
+#define PREEMPT_NEED_RESCHED 0x80000000
-void preempt_schedule_context(void);
+#include <asm/preempt.h>
-#define preempt_check_resched_context() \
-do { \
- if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
- preempt_schedule_context(); \
-} while (0)
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
+extern void preempt_count_add(int val);
+extern void preempt_count_sub(int val);
+#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
#else
+#define preempt_count_add(val) __preempt_count_add(val)
+#define preempt_count_sub(val) __preempt_count_sub(val)
+#define preempt_count_dec_and_test() __preempt_count_dec_and_test()
+#endif
-#define preempt_check_resched_context() preempt_check_resched()
-
-#endif /* CONFIG_CONTEXT_TRACKING */
-
-#else /* !CONFIG_PREEMPT */
-
-#define preempt_check_resched() do { } while (0)
-#define preempt_check_resched_context() do { } while (0)
-
-#endif /* CONFIG_PREEMPT */
+#define __preempt_count_inc() __preempt_count_add(1)
+#define __preempt_count_dec() __preempt_count_sub(1)
+#define preempt_count_inc() preempt_count_add(1)
+#define preempt_count_dec() preempt_count_sub(1)
#ifdef CONFIG_PREEMPT_COUNT
#define preempt_disable() \
do { \
- inc_preempt_count(); \
+ preempt_count_inc(); \
barrier(); \
} while (0)
#define sched_preempt_enable_no_resched() \
do { \
barrier(); \
- dec_preempt_count(); \
+ preempt_count_dec(); \
} while (0)
-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+#ifdef CONFIG_PREEMPT
#define preempt_enable() \
do { \
- preempt_enable_no_resched(); \
barrier(); \
- preempt_check_resched(); \
+ if (unlikely(preempt_count_dec_and_test())) \
+ __preempt_schedule(); \
+} while (0)
+
+#define preempt_check_resched() \
+do { \
+ if (should_resched()) \
+ __preempt_schedule(); \
} while (0)
-/* For debugging and tracer internals only! */
-#define add_preempt_count_notrace(val) \
- do { preempt_count() += (val); } while (0)
-#define sub_preempt_count_notrace(val) \
- do { preempt_count() -= (val); } while (0)
-#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
-#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
+#else
+#define preempt_enable() preempt_enable_no_resched()
+#define preempt_check_resched() do { } while (0)
+#endif
#define preempt_disable_notrace() \
do { \
- inc_preempt_count_notrace(); \
+ __preempt_count_inc(); \
barrier(); \
} while (0)
#define preempt_enable_no_resched_notrace() \
do { \
barrier(); \
- dec_preempt_count_notrace(); \
+ __preempt_count_dec(); \
} while (0)
-/* preempt_check_resched is OK to trace */
+#ifdef CONFIG_PREEMPT
+
+#ifndef CONFIG_CONTEXT_TRACKING
+#define __preempt_schedule_context() __preempt_schedule()
+#endif
+
#define preempt_enable_notrace() \
do { \
- preempt_enable_no_resched_notrace(); \
barrier(); \
- preempt_check_resched_context(); \
+ if (unlikely(__preempt_count_dec_and_test())) \
+ __preempt_schedule_context(); \
} while (0)
+#else
+#define preempt_enable_notrace() preempt_enable_no_resched_notrace()
+#endif
#else /* !CONFIG_PREEMPT_COUNT */
@@ -115,10 +104,11 @@ do { \
* that can cause faults and scheduling migrate into our preempt-protected
* region.
*/
-#define preempt_disable() barrier()
+#define preempt_disable() barrier()
#define sched_preempt_enable_no_resched() barrier()
-#define preempt_enable_no_resched() barrier()
-#define preempt_enable() barrier()
+#define preempt_enable_no_resched() barrier()
+#define preempt_enable() barrier()
+#define preempt_check_resched() do { } while (0)
#define preempt_disable_notrace() barrier()
#define preempt_enable_no_resched_notrace() barrier()
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e27baeeda3f..045b0d22784 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
#include <linux/errno.h>
#include <linux/nodemask.h>
#include <linux/mm_types.h>
+#include <linux/preempt.h>
#include <asm/page.h>
#include <asm/ptrace.h>
@@ -427,6 +428,14 @@ struct task_cputime {
.sum_exec_runtime = 0, \
}
+#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
+
+#ifdef CONFIG_PREEMPT_COUNT
+#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
+#else
+#define PREEMPT_DISABLED PREEMPT_ENABLED
+#endif
+
/*
* Disable preemption until the scheduler is running.
* Reset by start_kernel()->sched_init()->init_idle().
@@ -434,7 +443,7 @@ struct task_cputime {
* We include PREEMPT_ACTIVE to avoid cond_resched() from working
* before the scheduler is active -- see should_resched().
*/
-#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE)
/**
* struct thread_group_cputimer - thread group interval timer counts
@@ -768,6 +777,7 @@ enum cpu_idle_type {
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
+#define SD_NUMA 0x4000 /* cross-node balancing */
extern int __weak arch_sd_sibiling_asym_packing(void);
@@ -811,6 +821,10 @@ struct sched_domain {
u64 last_update;
+ /* idle_balance() stats */
+ u64 max_newidle_lb_cost;
+ unsigned long next_decay_max_lb_cost;
+
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1029,6 +1043,8 @@ struct task_struct {
struct task_struct *last_wakee;
unsigned long wakee_flips;
unsigned long wakee_flip_decay_ts;
+
+ int wake_cpu;
#endif
int on_rq;
@@ -1324,10 +1340,41 @@ struct task_struct {
#endif
#ifdef CONFIG_NUMA_BALANCING
int numa_scan_seq;
- int numa_migrate_seq;
unsigned int numa_scan_period;
+ unsigned int numa_scan_period_max;
+ int numa_preferred_nid;
+ int numa_migrate_deferred;
+ unsigned long numa_migrate_retry;
u64 node_stamp; /* migration stamp */
struct callback_head numa_work;
+
+ struct list_head numa_entry;
+ struct numa_group *numa_group;
+
+ /*
+ * Exponential decaying average of faults on a per-node basis.
+ * Scheduling placement decisions are made based on the these counts.
+ * The values remain static for the duration of a PTE scan
+ */
+ unsigned long *numa_faults;
+ unsigned long total_numa_faults;
+
+ /*
+ * numa_faults_buffer records faults per node during the current
+ * scan window. When the scan completes, the counts in numa_faults
+ * decay and these values are copied.
+ */
+ unsigned long *numa_faults_buffer;
+
+ /*
+ * numa_faults_locality tracks if faults recorded during the last
+ * scan window were remote/local. The task scan period is adapted
+ * based on the locality of the faults with different weights
+ * depending on whether they were shared or private faults
+ */
+ unsigned long numa_faults_locality[2];
+
+ unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
struct rcu_head rcu;
@@ -1412,16 +1459,33 @@ struct task_struct {
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+#define TNF_MIGRATED 0x01
+#define TNF_NO_GROUP 0x02
+#define TNF_SHARED 0x04
+#define TNF_FAULT_LOCAL 0x08
+
#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
+extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
+
+extern unsigned int sysctl_numa_balancing_migrate_deferred;
#else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+ int flags)
{
}
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+ return 0;
+}
static inline void set_numabalancing_state(bool enabled)
{
}
+static inline void task_numa_free(struct task_struct *p)
+{
+}
#endif
static inline struct pid *task_pid(struct task_struct *task)
@@ -1974,7 +2038,7 @@ extern void wake_up_new_task(struct task_struct *tsk);
#else
static inline void kick_process(struct task_struct *tsk) { }
#endif
-extern void sched_fork(struct task_struct *p);
+extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_dead(struct task_struct *p);
extern void proc_caches_init(void);
@@ -2401,11 +2465,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}
-static inline int need_resched(void)
-{
- return unlikely(test_thread_flag(TIF_NEED_RESCHED));
-}
-
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
@@ -2474,36 +2533,105 @@ static inline int tsk_is_polling(struct task_struct *p)
{
return task_thread_info(p)->status & TS_POLLING;
}
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
{
current_thread_info()->status |= TS_POLLING;
}
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ __current_set_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb();
+
+ return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
{
current_thread_info()->status &= ~TS_POLLING;
- smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb();
+
+ return unlikely(tif_need_resched());
}
#elif defined(TIF_POLLING_NRFLAG)
static inline int tsk_is_polling(struct task_struct *p)
{
return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
}
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
{
set_thread_flag(TIF_POLLING_NRFLAG);
}
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ __current_set_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ *
+ * XXX: assumes set/clear bit are identical barrier wise.
+ */
+ smp_mb__after_clear_bit();
+
+ return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
{
clear_thread_flag(TIF_POLLING_NRFLAG);
}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb__after_clear_bit();
+
+ return unlikely(tif_need_resched());
+}
+
#else
static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
#endif
+static __always_inline bool need_resched(void)
+{
+ return unlikely(tif_need_resched());
+}
+
/*
* Thread group CPU time accounting.
*/
@@ -2545,6 +2673,11 @@ static inline unsigned int task_cpu(const struct task_struct *p)
return task_thread_info(p)->cpu;
}
+static inline int task_node(const struct task_struct *p)
+{
+ return cpu_to_node(task_cpu(p));
+}
+
extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
#else
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bf8086b2506..10d16c4fbe8 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -47,7 +47,6 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_numa_balancing_settle_count;
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 3b5e910d14c..d2abbdb8c6a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -28,6 +28,7 @@ struct cpu_stop_work {
};
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf);
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e7e04736802..fddbe2023a5 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -104,8 +104,21 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
#define test_thread_flag(flag) \
test_ti_thread_flag(current_thread_info(), flag)
-#define set_need_resched() set_thread_flag(TIF_NEED_RESCHED)
-#define clear_need_resched() clear_thread_flag(TIF_NEED_RESCHED)
+static inline __deprecated void set_need_resched(void)
+{
+ /*
+ * Use of this function in deprecated.
+ *
+ * As of this writing there are only a few users in the DRM tree left
+ * all of which are wrong and can be removed without causing too much
+ * grief.
+ *
+ * The DRM people are aware and are working on removing the last few
+ * instances.
+ */
+}
+
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
#if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
/*
diff --git a/include/linux/topology.h b/include/linux/topology.h
index d3cf0d6e771..12ae6ce997d 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -106,6 +106,8 @@ int arch_update_cpu_topology(void);
.last_balance = jiffies, \
.balance_interval = 1, \
.smt_gain = 1178, /* 15% */ \
+ .max_newidle_lb_cost = 0, \
+ .next_decay_max_lb_cost = jiffies, \
}
#endif
#endif /* CONFIG_SCHED_SMT */
@@ -135,6 +137,8 @@ int arch_update_cpu_topology(void);
, \
.last_balance = jiffies, \
.balance_interval = 1, \
+ .max_newidle_lb_cost = 0, \
+ .next_decay_max_lb_cost = jiffies, \
}
#endif
#endif /* CONFIG_SCHED_MC */
@@ -166,6 +170,8 @@ int arch_update_cpu_topology(void);
, \
.last_balance = jiffies, \
.balance_interval = 1, \
+ .max_newidle_lb_cost = 0, \
+ .next_decay_max_lb_cost = jiffies, \
}
#endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 2f47989d828..97d660ed70c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -671,31 +671,17 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
#define wait_event_interruptible_tty(tty, wq, condition) \
({ \
int __ret = 0; \
- if (!(condition)) { \
- __wait_event_interruptible_tty(tty, wq, condition, __ret); \
- } \
+ if (!(condition)) \
+ __ret = __wait_event_interruptible_tty(tty, wq, \
+ condition); \
__ret; \
})
-#define __wait_event_interruptible_tty(tty, wq, condition, ret) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (!signal_pending(current)) { \
- tty_unlock(tty); \
+#define __wait_event_interruptible_tty(tty, wq, condition) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
+ tty_unlock(tty); \
schedule(); \
- tty_lock(tty); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- finish_wait(&wq, &__wait); \
-} while (0)
+ tty_lock(tty))
#ifdef CONFIG_PROC_FS
extern void proc_tty_register_driver(struct tty_driver *);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 5ca0951e185..9d8cf056e66 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -15,7 +15,7 @@
*/
static inline void pagefault_disable(void)
{
- inc_preempt_count();
+ preempt_count_inc();
/*
* make sure to have issued the store before a pagefault
* can hit.
@@ -30,11 +30,7 @@ static inline void pagefault_enable(void)
* the pagefault handler again.
*/
barrier();
- dec_preempt_count();
- /*
- * make sure we do..
- */
- barrier();
+ preempt_count_dec();
preempt_check_resched();
}
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a67fc163559..61939ba30aa 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1,7 +1,8 @@
#ifndef _LINUX_WAIT_H
#define _LINUX_WAIT_H
-
-
+/*
+ * Linux wait queue related types and methods
+ */
#include <linux/list.h>
#include <linux/stddef.h>
#include <linux/spinlock.h>
@@ -13,27 +14,27 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
struct __wait_queue {
- unsigned int flags;
+ unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
- void *private;
- wait_queue_func_t func;
- struct list_head task_list;
+ void *private;
+ wait_queue_func_t func;
+ struct list_head task_list;
};
struct wait_bit_key {
- void *flags;
- int bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR -1
+ void *flags;
+ int bit_nr;
+#define WAIT_ATOMIC_T_BIT_NR -1
};
struct wait_bit_queue {
- struct wait_bit_key key;
- wait_queue_t wait;
+ struct wait_bit_key key;
+ wait_queue_t wait;
};
struct __wait_queue_head {
- spinlock_t lock;
- struct list_head task_list;
+ spinlock_t lock;
+ struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;
@@ -84,17 +85,17 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
- q->flags = 0;
- q->private = p;
- q->func = default_wake_function;
+ q->flags = 0;
+ q->private = p;
+ q->func = default_wake_function;
}
-static inline void init_waitqueue_func_entry(wait_queue_t *q,
- wait_queue_func_t func)
+static inline void
+init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
{
- q->flags = 0;
- q->private = NULL;
- q->func = func;
+ q->flags = 0;
+ q->private = NULL;
+ q->func = func;
}
static inline int waitqueue_active(wait_queue_head_t *q)
@@ -114,8 +115,8 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
/*
* Used for wake-one threads:
*/
-static inline void __add_wait_queue_exclusive(wait_queue_head_t *q,
- wait_queue_t *wait)
+static inline void
+__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
wait->flags |= WQ_FLAG_EXCLUSIVE;
__add_wait_queue(q, wait);
@@ -127,23 +128,22 @@ static inline void __add_wait_queue_tail(wait_queue_head_t *head,
list_add_tail(&new->task_list, &head->task_list);
}
-static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q,
- wait_queue_t *wait)
+static inline void
+__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
wait->flags |= WQ_FLAG_EXCLUSIVE;
__add_wait_queue_tail(q, wait);
}
-static inline void __remove_wait_queue(wait_queue_head_t *head,
- wait_queue_t *old)
+static inline void
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
{
list_del(&old->task_list);
}
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
- void *key);
+void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
void __wake_up_bit(wait_queue_head_t *, void *, int);
@@ -170,27 +170,64 @@ wait_queue_head_t *bit_waitqueue(void *, int);
/*
* Wakeup macros to be used to report events to the targets.
*/
-#define wake_up_poll(x, m) \
+#define wake_up_poll(x, m) \
__wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m) \
+#define wake_up_locked_poll(x, m) \
__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m) \
+#define wake_up_interruptible_poll(x, m) \
__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
#define wake_up_interruptible_sync_poll(x, m) \
__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
-#define __wait_event(wq, condition) \
-do { \
- DEFINE_WAIT(__wait); \
+#define ___wait_cond_timeout(condition) \
+({ \
+ bool __cond = (condition); \
+ if (__cond && !__ret) \
+ __ret = 1; \
+ __cond || !__ret; \
+})
+
+#define ___wait_is_interruptible(state) \
+ (!__builtin_constant_p(state) || \
+ state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \
+
+#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \
+({ \
+ __label__ __out; \
+ wait_queue_t __wait; \
+ long __ret = ret; \
+ \
+ INIT_LIST_HEAD(&__wait.task_list); \
+ if (exclusive) \
+ __wait.flags = WQ_FLAG_EXCLUSIVE; \
+ else \
+ __wait.flags = 0; \
\
for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
+ long __int = prepare_to_wait_event(&wq, &__wait, state);\
+ \
if (condition) \
break; \
- schedule(); \
+ \
+ if (___wait_is_interruptible(state) && __int) { \
+ __ret = __int; \
+ if (exclusive) { \
+ abort_exclusive_wait(&wq, &__wait, \
+ state, NULL); \
+ goto __out; \
+ } \
+ break; \
+ } \
+ \
+ cmd; \
} \
finish_wait(&wq, &__wait); \
-} while (0)
+__out: __ret; \
+})
+
+#define __wait_event(wq, condition) \
+ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+ schedule())
/**
* wait_event - sleep until a condition gets true
@@ -204,29 +241,17 @@ do { \
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
-#define wait_event(wq, condition) \
+#define wait_event(wq, condition) \
do { \
- if (condition) \
+ if (condition) \
break; \
__wait_event(wq, condition); \
} while (0)
-#define __wait_event_timeout(wq, condition, ret) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- ret = schedule_timeout(ret); \
- if (!ret) \
- break; \
- } \
- if (!ret && (condition)) \
- ret = 1; \
- finish_wait(&wq, &__wait); \
-} while (0)
+#define __wait_event_timeout(wq, condition, timeout) \
+ ___wait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_UNINTERRUPTIBLE, 0, timeout, \
+ __ret = schedule_timeout(__ret))
/**
* wait_event_timeout - sleep until a condition gets true or a timeout elapses
@@ -248,28 +273,14 @@ do { \
#define wait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
- if (!(condition)) \
- __wait_event_timeout(wq, condition, __ret); \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __wait_event_timeout(wq, condition, timeout); \
__ret; \
})
-#define __wait_event_interruptible(wq, condition, ret) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (!signal_pending(current)) { \
- schedule(); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- finish_wait(&wq, &__wait); \
-} while (0)
+#define __wait_event_interruptible(wq, condition) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
+ schedule())
/**
* wait_event_interruptible - sleep until a condition gets true
@@ -290,31 +301,14 @@ do { \
({ \
int __ret = 0; \
if (!(condition)) \
- __wait_event_interruptible(wq, condition, __ret); \
+ __ret = __wait_event_interruptible(wq, condition); \
__ret; \
})
-#define __wait_event_interruptible_timeout(wq, condition, ret) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (!signal_pending(current)) { \
- ret = schedule_timeout(ret); \
- if (!ret) \
- break; \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- if (!ret && (condition)) \
- ret = 1; \
- finish_wait(&wq, &__wait); \
-} while (0)
+#define __wait_event_interruptible_timeout(wq, condition, timeout) \
+ ___wait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_INTERRUPTIBLE, 0, timeout, \
+ __ret = schedule_timeout(__ret))
/**
* wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
@@ -337,15 +331,15 @@ do { \
#define wait_event_interruptible_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
- if (!(condition)) \
- __wait_event_interruptible_timeout(wq, condition, __ret); \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __wait_event_interruptible_timeout(wq, \
+ condition, timeout); \
__ret; \
})
#define __wait_event_hrtimeout(wq, condition, timeout, state) \
({ \
int __ret = 0; \
- DEFINE_WAIT(__wait); \
struct hrtimer_sleeper __t; \
\
hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \
@@ -356,25 +350,15 @@ do { \
current->timer_slack_ns, \
HRTIMER_MODE_REL); \
\
- for (;;) { \
- prepare_to_wait(&wq, &__wait, state); \
- if (condition) \
- break; \
- if (state == TASK_INTERRUPTIBLE && \
- signal_pending(current)) { \
- __ret = -ERESTARTSYS; \
- break; \
- } \
+ __ret = ___wait_event(wq, condition, state, 0, 0, \
if (!__t.task) { \
__ret = -ETIME; \
break; \
} \
- schedule(); \
- } \
+ schedule()); \
\
hrtimer_cancel(&__t.timer); \
destroy_hrtimer_on_stack(&__t.timer); \
- finish_wait(&wq, &__wait); \
__ret; \
})
@@ -428,33 +412,15 @@ do { \
__ret; \
})
-#define __wait_event_interruptible_exclusive(wq, condition, ret) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait_exclusive(&wq, &__wait, \
- TASK_INTERRUPTIBLE); \
- if (condition) { \
- finish_wait(&wq, &__wait); \
- break; \
- } \
- if (!signal_pending(current)) { \
- schedule(); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- abort_exclusive_wait(&wq, &__wait, \
- TASK_INTERRUPTIBLE, NULL); \
- break; \
- } \
-} while (0)
+#define __wait_event_interruptible_exclusive(wq, condition) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
+ schedule())
#define wait_event_interruptible_exclusive(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
- __wait_event_interruptible_exclusive(wq, condition, __ret);\
+ __ret = __wait_event_interruptible_exclusive(wq, condition);\
__ret; \
})
@@ -606,24 +572,8 @@ do { \
? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
-
-#define __wait_event_killable(wq, condition, ret) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_KILLABLE); \
- if (condition) \
- break; \
- if (!fatal_signal_pending(current)) { \
- schedule(); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- finish_wait(&wq, &__wait); \
-} while (0)
+#define __wait_event_killable(wq, condition) \
+ ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
/**
* wait_event_killable - sleep until a condition gets true
@@ -644,26 +594,17 @@ do { \
({ \
int __ret = 0; \
if (!(condition)) \
- __wait_event_killable(wq, condition, __ret); \
+ __ret = __wait_event_killable(wq, condition); \
__ret; \
})
#define __wait_event_lock_irq(wq, condition, lock, cmd) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- spin_unlock_irq(&lock); \
- cmd; \
- schedule(); \
- spin_lock_irq(&lock); \
- } \
- finish_wait(&wq, &__wait); \
-} while (0)
+ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+ spin_unlock_irq(&lock); \
+ cmd; \
+ schedule(); \
+ spin_lock_irq(&lock))
/**
* wait_event_lock_irq_cmd - sleep until a condition gets true. The
@@ -723,26 +664,12 @@ do { \
} while (0)
-#define __wait_event_interruptible_lock_irq(wq, condition, \
- lock, ret, cmd) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (signal_pending(current)) { \
- ret = -ERESTARTSYS; \
- break; \
- } \
- spin_unlock_irq(&lock); \
- cmd; \
- schedule(); \
- spin_lock_irq(&lock); \
- } \
- finish_wait(&wq, &__wait); \
-} while (0)
+#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \
+ ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
+ spin_unlock_irq(&lock); \
+ cmd; \
+ schedule(); \
+ spin_lock_irq(&lock))
/**
* wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
@@ -772,10 +699,9 @@ do { \
#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
({ \
int __ret = 0; \
- \
if (!(condition)) \
- __wait_event_interruptible_lock_irq(wq, condition, \
- lock, __ret, cmd); \
+ __ret = __wait_event_interruptible_lock_irq(wq, \
+ condition, lock, cmd); \
__ret; \
})
@@ -804,39 +730,24 @@ do { \
#define wait_event_interruptible_lock_irq(wq, condition, lock) \
({ \
int __ret = 0; \
- \
if (!(condition)) \
- __wait_event_interruptible_lock_irq(wq, condition, \
- lock, __ret, ); \
+ __ret = __wait_event_interruptible_lock_irq(wq, \
+ condition, lock,); \
__ret; \
})
#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \
- lock, ret) \
-do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { \
- prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (signal_pending(current)) { \
- ret = -ERESTARTSYS; \
- break; \
- } \
- spin_unlock_irq(&lock); \
- ret = schedule_timeout(ret); \
- spin_lock_irq(&lock); \
- if (!ret) \
- break; \
- } \
- finish_wait(&wq, &__wait); \
-} while (0)
+ lock, timeout) \
+ ___wait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_INTERRUPTIBLE, 0, timeout, \
+ spin_unlock_irq(&lock); \
+ __ret = schedule_timeout(__ret); \
+ spin_lock_irq(&lock));
/**
- * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses.
- * The condition is checked under the lock. This is expected
- * to be called with the lock taken.
+ * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
+ * true or a timeout elapses. The condition is checked under
+ * the lock. This is expected to be called with the lock taken.
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @lock: a locked spinlock_t, which will be released before schedule()
@@ -860,11 +771,10 @@ do { \
#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \
timeout) \
({ \
- int __ret = timeout; \
- \
- if (!(condition)) \
- __wait_event_interruptible_lock_irq_timeout( \
- wq, condition, lock, __ret); \
+ long __ret = timeout; \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __wait_event_interruptible_lock_irq_timeout( \
+ wq, condition, lock, timeout); \
__ret; \
})
@@ -875,20 +785,18 @@ do { \
* We plan to remove these interfaces.
*/
extern void sleep_on(wait_queue_head_t *q);
-extern long sleep_on_timeout(wait_queue_head_t *q,
- signed long timeout);
+extern long sleep_on_timeout(wait_queue_head_t *q, signed long timeout);
extern void interruptible_sleep_on(wait_queue_head_t *q);
-extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
- signed long timeout);
+extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout);
/*
* Waitqueues which are removed from the waitqueue_head at wakeup time
*/
void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
- unsigned int mode, void *key);
+void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
@@ -934,8 +842,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
* One uses wait_on_bit() where one is waiting for the bit to clear,
* but has no intention of setting it.
*/
-static inline int wait_on_bit(void *word, int bit,
- int (*action)(void *), unsigned mode)
+static inline int
+wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
{
if (!test_bit(bit, word))
return 0;
@@ -958,8 +866,8 @@ static inline int wait_on_bit(void *word, int bit,
* One uses wait_on_bit_lock() where one is waiting for the bit to
* clear with the intention of setting it, and when done, clearing it.
*/
-static inline int wait_on_bit_lock(void *word, int bit,
- int (*action)(void *), unsigned mode)
+static inline int
+wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode)
{
if (!test_and_set_bit(bit, word))
return 0;
@@ -983,5 +891,5 @@ int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
return 0;
return out_of_line_wait_on_atomic_t(val, action, mode);
}
-
-#endif
+
+#endif /* _LINUX_WAIT_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 2e7d9947a10..613381bcde4 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
/*
* For all intents and purposes a preempted task is a running task.
*/
- if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
+ if (task_preempt_count(p) & PREEMPT_ACTIVE)
state = TASK_RUNNING | TASK_STATE_MAX;
#endif