From 4e857c58efeb99393cba5a5d0d8ec7117183137c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 17 Mar 2014 18:06:10 +0100 Subject: arch: Mass conversion of smp_mb__*() Mostly scripted conversion of the smp_mb__* barriers. Signed-off-by: Peter Zijlstra Acked-by: Paul E. McKenney Link: http://lkml.kernel.org/n/tip-55dhyhocezdw1dg7u19hmh1u@git.kernel.org Cc: Linus Torvalds Cc: linux-arch@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 25f54c79f75..010cde3b44c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2782,10 +2782,8 @@ static inline bool __must_check current_set_polling_and_test(void) /* * Polling state must be visible before we test NEED_RESCHED, * paired by resched_task() - * - * XXX: assumes set/clear bit are identical barrier wise. */ - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return unlikely(tif_need_resched()); } @@ -2803,7 +2801,7 @@ static inline bool __must_check current_clr_polling_and_test(void) * Polling state must be visible before we test NEED_RESCHED, * paired by resched_task() */ - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return unlikely(tif_need_resched()); } -- cgit v1.2.3-70-g09d2 From 5bfd126e80dca70431aef8fdbc1cf14535f3c338 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 15 Apr 2014 13:49:04 +0200 Subject: sched/deadline: Fix sched_yield() behavior yield_task_dl() is broken: o it forces current to be throttled setting its runtime to zero; o it sets current's dl_se->dl_new to one, expecting that dl_task_timer() will queue it back with proper parameters at replenish time. Unfortunately, dl_task_timer() has this check at the very beginning: if (!dl_task(p) || dl_se->dl_new) goto unlock; So, it just bails out and the task is never replenished. It actually yielded forever. To fix this, introduce a new flag indicating that the task properly yielded the CPU before its current runtime expired. While this is a little overdoing at the moment, the flag would be useful in the future to discriminate between "good" jobs (of which remaining runtime could be reclaimed, i.e. recycled) and "bad" jobs (for which dl_throttled task has been set) that needed to be stopped. Reported-by: yjay.kim Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140429103953.e68eba1b2ac3309214e3dc5a@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 +++++-- kernel/sched/core.c | 1 + kernel/sched/deadline.c | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 25f54c79f75..2a4298fb0d8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1153,9 +1153,12 @@ struct sched_dl_entity { * * @dl_boosted tells if we are boosted due to DI. If so we are * outside bandwidth enforcement mechanism (but only until we - * exit the critical section). + * exit the critical section); + * + * @dl_yielded tells if task gave up the cpu before consuming + * all its available runtime during the last job. */ - int dl_throttled, dl_new, dl_boosted; + int dl_throttled, dl_new, dl_boosted, dl_yielded; /* * Bandwidth enforcement timer. Each -deadline task has its diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9fe2190005c..e62c65a12d5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3124,6 +3124,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); dl_se->dl_throttled = 0; dl_se->dl_new = 1; + dl_se->dl_yielded = 0; } static void __setscheduler_params(struct task_struct *p, diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b08095786cb..800e99b9907 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) sched_clock_tick(); update_rq_clock(rq); dl_se->dl_throttled = 0; + dl_se->dl_yielded = 0; if (p->on_rq) { enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); if (task_has_dl_policy(rq->curr)) @@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq) * We make the task go to sleep until its current deadline by * forcing its runtime to zero. This way, update_curr_dl() stops * it and the bandwidth timer will wake it up and will give it - * new scheduling parameters (thanks to dl_new=1). + * new scheduling parameters (thanks to dl_yielded=1). */ if (p->dl.runtime > 0) { - rq->curr->dl.dl_new = 1; + rq->curr->dl.dl_yielded = 1; p->dl.runtime = 0; } update_curr_dl(rq); -- cgit v1.2.3-70-g09d2 From 143e1e28cb40bed836b0a06567208bd7347c9672 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 11 Apr 2014 11:44:37 +0200 Subject: sched: Rework sched_domain topology definition We replace the old way to configure the scheduler topology with a new method which enables a platform to declare additionnal level (if needed). We still have a default topology table definition that can be used by platform that don't want more level than the SMT, MC, CPU and NUMA ones. This table can be overwritten by an arch which either wants to add new level where a load balance make sense like BOOK or powergating level or wants to change the flags configuration of some levels. For each level, we need a function pointer that returns cpumask for each cpu, a function pointer that returns the flags for the level and a name. Only flags that describe topology, can be set by an architecture. The current topology flags are: SD_SHARE_CPUPOWER SD_SHARE_PKG_RESOURCES SD_NUMA SD_ASYM_PACKING Then, each level must be a subset on the next one. The build sequence of the sched_domain will take care of removing useless levels like those with 1 CPU and those with the same CPU span and no more relevant information for load balancing than its children. Signed-off-by: Vincent Guittot Tested-by: Dietmar Eggemann Reviewed-by: Preeti U Murthy Reviewed-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Chris Metcalf Cc: Christoph Lameter Cc: David S. Miller Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Hanjun Guo Cc: Heiko Carstens Cc: Jason Low Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Tony Luck Cc: linux390@de.ibm.com Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/1397209481-28542-2-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 24 ---- arch/s390/include/asm/topology.h | 2 - arch/tile/include/asm/topology.h | 33 ------ include/linux/sched.h | 53 +++++++++ include/linux/topology.h | 128 +++------------------ kernel/sched/core.c | 233 ++++++++++++++++++++------------------- 6 files changed, 186 insertions(+), 287 deletions(-) (limited to 'include/linux/sched.h') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 5cb55a1e606..3202aa74e0d 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -46,30 +46,6 @@ void build_cpu_to_node_map(void); -#define SD_CPU_INIT (struct sched_domain) { \ - .parent = NULL, \ - .child = NULL, \ - .groups = NULL, \ - .min_interval = 1, \ - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 2, \ - .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 0, \ - .wake_idx = 0, \ - .forkexec_idx = 0, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_EXEC \ - | SD_BALANCE_FORK \ - | SD_WAKE_AFFINE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} - #endif /* CONFIG_NUMA */ #ifdef CONFIG_SMP diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 05425b18c0a..07763bdb408 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -64,8 +64,6 @@ static inline void s390_init_cpu_topology(void) }; #endif -#define SD_BOOK_INIT SD_CPU_INIT - #include #endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index d15c0d8d550..93831184423 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -44,39 +44,6 @@ static inline const struct cpumask *cpumask_of_node(int node) /* For now, use numa node -1 for global allocation. */ #define pcibus_to_node(bus) ((void)(bus), -1) -/* - * TILE architecture has many cores integrated in one processor, so we need - * setup bigger balance_interval for both CPU/NODE scheduling domains to - * reduce process scheduling costs. - */ - -/* sched_domains SD_CPU_INIT for TILE architecture */ -#define SD_CPU_INIT (struct sched_domain) { \ - .min_interval = 4, \ - .max_interval = 128, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 0, \ - .wake_idx = 0, \ - .forkexec_idx = 0, \ - \ - .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ - | 1*SD_BALANCE_EXEC \ - | 1*SD_BALANCE_FORK \ - | 0*SD_BALANCE_WAKE \ - | 0*SD_WAKE_AFFINE \ - | 0*SD_SHARE_CPUPOWER \ - | 0*SD_SHARE_PKG_RESOURCES \ - | 0*SD_SERIALIZE \ - , \ - .last_balance = jiffies, \ - .balance_interval = 32, \ -} - /* By definition, we create nodes based on online memory. */ #define node_has_online_mem(nid) 1 diff --git a/include/linux/sched.h b/include/linux/sched.h index 2a4298fb0d8..656b035c30e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -879,6 +879,27 @@ enum cpu_idle_type { extern int __weak arch_sd_sibiling_asym_packing(void); +#ifdef CONFIG_SCHED_SMT +static inline const int cpu_smt_flags(void) +{ + return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_SCHED_MC +static inline const int cpu_core_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_NUMA +static inline const int cpu_numa_flags(void) +{ + return SD_NUMA; +} +#endif + struct sched_domain_attr { int relax_domain_level; }; @@ -985,6 +1006,38 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); bool cpus_share_cache(int this_cpu, int that_cpu); +typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +typedef const int (*sched_domain_flags_f)(void); + +#define SDTL_OVERLAP 0x01 + +struct sd_data { + struct sched_domain **__percpu sd; + struct sched_group **__percpu sg; + struct sched_group_power **__percpu sgp; +}; + +struct sched_domain_topology_level { + sched_domain_mask_f mask; + sched_domain_flags_f sd_flags; + int flags; + int numa_level; + struct sd_data data; +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif +}; + +extern struct sched_domain_topology_level *sched_domain_topology; + +extern void set_sched_topology(struct sched_domain_topology_level *tl); + +#ifdef CONFIG_SCHED_DEBUG +# define SD_INIT_NAME(type) .name = #type +#else +# define SD_INIT_NAME(type) +#endif + #else /* CONFIG_SMP */ struct sched_domain_attr; diff --git a/include/linux/topology.h b/include/linux/topology.h index 7062330a132..973671ff9e7 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -66,121 +66,6 @@ int arch_update_cpu_topology(void); #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif -/* - * Below are the 3 major initializers used in building sched_domains: - * SD_SIBLING_INIT, for SMT domains - * SD_CPU_INIT, for SMP domains - * - * Any architecture that cares to do any tuning to these values should do so - * by defining their own arch-specific initializer in include/asm/topology.h. - * A definition there will automagically override these default initializers - * and allow arch-specific performance tuning of sched_domains. - * (Only non-zero and non-null fields need be specified.) - */ - -#ifdef CONFIG_SCHED_SMT -/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is, - * so can't we drop this in favor of CONFIG_SCHED_SMT? - */ -#define ARCH_HAS_SCHED_WAKE_IDLE -/* Common values for SMT siblings */ -#ifndef SD_SIBLING_INIT -#define SD_SIBLING_INIT (struct sched_domain) { \ - .min_interval = 1, \ - .max_interval = 2, \ - .busy_factor = 64, \ - .imbalance_pct = 110, \ - \ - .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ - | 1*SD_BALANCE_EXEC \ - | 1*SD_BALANCE_FORK \ - | 0*SD_BALANCE_WAKE \ - | 1*SD_WAKE_AFFINE \ - | 1*SD_SHARE_CPUPOWER \ - | 1*SD_SHARE_PKG_RESOURCES \ - | 0*SD_SERIALIZE \ - | 0*SD_PREFER_SIBLING \ - | arch_sd_sibling_asym_packing() \ - , \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .smt_gain = 1178, /* 15% */ \ - .max_newidle_lb_cost = 0, \ - .next_decay_max_lb_cost = jiffies, \ -} -#endif -#endif /* CONFIG_SCHED_SMT */ - -#ifdef CONFIG_SCHED_MC -/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */ -#ifndef SD_MC_INIT -#define SD_MC_INIT (struct sched_domain) { \ - .min_interval = 1, \ - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 2, \ - .wake_idx = 0, \ - .forkexec_idx = 0, \ - \ - .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ - | 1*SD_BALANCE_EXEC \ - | 1*SD_BALANCE_FORK \ - | 0*SD_BALANCE_WAKE \ - | 1*SD_WAKE_AFFINE \ - | 0*SD_SHARE_CPUPOWER \ - | 1*SD_SHARE_PKG_RESOURCES \ - | 0*SD_SERIALIZE \ - , \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .max_newidle_lb_cost = 0, \ - .next_decay_max_lb_cost = jiffies, \ -} -#endif -#endif /* CONFIG_SCHED_MC */ - -/* Common values for CPUs */ -#ifndef SD_CPU_INIT -#define SD_CPU_INIT (struct sched_domain) { \ - .min_interval = 1, \ - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 0, \ - .wake_idx = 0, \ - .forkexec_idx = 0, \ - \ - .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ - | 1*SD_BALANCE_EXEC \ - | 1*SD_BALANCE_FORK \ - | 0*SD_BALANCE_WAKE \ - | 1*SD_WAKE_AFFINE \ - | 0*SD_SHARE_CPUPOWER \ - | 0*SD_SHARE_PKG_RESOURCES \ - | 0*SD_SERIALIZE \ - | 1*SD_PREFER_SIBLING \ - , \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .max_newidle_lb_cost = 0, \ - .next_decay_max_lb_cost = jiffies, \ -} -#endif - -#ifdef CONFIG_SCHED_BOOK -#ifndef SD_BOOK_INIT -#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! -#endif -#endif /* CONFIG_SCHED_BOOK */ - #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DECLARE_PER_CPU(int, numa_node); @@ -295,4 +180,17 @@ static inline int cpu_to_mem(int cpu) #define topology_core_cpumask(cpu) cpumask_of(cpu) #endif +#ifdef CONFIG_SCHED_SMT +static inline const struct cpumask *cpu_smt_mask(int cpu) +{ + return topology_thread_cpumask(cpu); +} +#endif + +static inline const struct cpumask *cpu_cpu_mask(int cpu) +{ + return cpumask_of_node(cpu_to_node(cpu)); +} + + #endif /* _LINUX_TOPOLOGY_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 13584f1cccf..7d332b7899c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5566,17 +5566,6 @@ static int __init isolated_cpu_setup(char *str) __setup("isolcpus=", isolated_cpu_setup); -static const struct cpumask *cpu_cpu_mask(int cpu) -{ - return cpumask_of_node(cpu_to_node(cpu)); -} - -struct sd_data { - struct sched_domain **__percpu sd; - struct sched_group **__percpu sg; - struct sched_group_power **__percpu sgp; -}; - struct s_data { struct sched_domain ** __percpu sd; struct root_domain *rd; @@ -5589,21 +5578,6 @@ enum s_alloc { sa_none, }; -struct sched_domain_topology_level; - -typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); - -#define SDTL_OVERLAP 0x01 - -struct sched_domain_topology_level { - sched_domain_init_f init; - sched_domain_mask_f mask; - int flags; - int numa_level; - struct sd_data data; -}; - /* * Build an iteration mask that can exclude certain CPUs from the upwards * domain traversal. @@ -5832,34 +5806,6 @@ int __weak arch_sd_sibling_asym_packing(void) * Non-inlined to reduce accumulated stack pressure in build_sched_domains() */ -#ifdef CONFIG_SCHED_DEBUG -# define SD_INIT_NAME(sd, type) sd->name = #type -#else -# define SD_INIT_NAME(sd, type) do { } while (0) -#endif - -#define SD_INIT_FUNC(type) \ -static noinline struct sched_domain * \ -sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ -{ \ - struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \ - *sd = SD_##type##_INIT; \ - SD_INIT_NAME(sd, type); \ - sd->private = &tl->data; \ - return sd; \ -} - -SD_INIT_FUNC(CPU) -#ifdef CONFIG_SCHED_SMT - SD_INIT_FUNC(SIBLING) -#endif -#ifdef CONFIG_SCHED_MC - SD_INIT_FUNC(MC) -#endif -#ifdef CONFIG_SCHED_BOOK - SD_INIT_FUNC(BOOK) -#endif - static int default_relax_domain_level = -1; int sched_domain_level_max; @@ -5947,99 +5893,156 @@ static void claim_allocations(int cpu, struct sched_domain *sd) *per_cpu_ptr(sdd->sgp, cpu) = NULL; } -#ifdef CONFIG_SCHED_SMT -static const struct cpumask *cpu_smt_mask(int cpu) -{ - return topology_thread_cpumask(cpu); -} -#endif - -/* - * Topology list, bottom-up. - */ -static struct sched_domain_topology_level default_topology[] = { -#ifdef CONFIG_SCHED_SMT - { sd_init_SIBLING, cpu_smt_mask, }, -#endif -#ifdef CONFIG_SCHED_MC - { sd_init_MC, cpu_coregroup_mask, }, -#endif -#ifdef CONFIG_SCHED_BOOK - { sd_init_BOOK, cpu_book_mask, }, -#endif - { sd_init_CPU, cpu_cpu_mask, }, - { NULL, }, -}; - -static struct sched_domain_topology_level *sched_domain_topology = default_topology; - -#define for_each_sd_topology(tl) \ - for (tl = sched_domain_topology; tl->init; tl++) - #ifdef CONFIG_NUMA - static int sched_domains_numa_levels; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; static int sched_domains_curr_level; +#endif -static inline int sd_local_flags(int level) -{ - if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) - return 0; - - return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; -} +/* + * SD_flags allowed in topology descriptions. + * + * SD_SHARE_CPUPOWER - describes SMT topologies + * SD_SHARE_PKG_RESOURCES - describes shared caches + * SD_NUMA - describes NUMA topologies + * + * Odd one out: + * SD_ASYM_PACKING - describes SMT quirks + */ +#define TOPOLOGY_SD_FLAGS \ + (SD_SHARE_CPUPOWER | \ + SD_SHARE_PKG_RESOURCES | \ + SD_NUMA | \ + SD_ASYM_PACKING) static struct sched_domain * -sd_numa_init(struct sched_domain_topology_level *tl, int cpu) +sd_init(struct sched_domain_topology_level *tl, int cpu) { struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); - int level = tl->numa_level; - int sd_weight = cpumask_weight( - sched_domains_numa_masks[level][cpu_to_node(cpu)]); + int sd_weight, sd_flags = 0; + +#ifdef CONFIG_NUMA + /* + * Ugly hack to pass state to sd_numa_mask()... + */ + sched_domains_curr_level = tl->numa_level; +#endif + + sd_weight = cpumask_weight(tl->mask(cpu)); + + if (tl->sd_flags) + sd_flags = (*tl->sd_flags)(); + if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS, + "wrong sd_flags in topology description\n")) + sd_flags &= ~TOPOLOGY_SD_FLAGS; *sd = (struct sched_domain){ .min_interval = sd_weight, .max_interval = 2*sd_weight, .busy_factor = 32, .imbalance_pct = 125, - .cache_nice_tries = 2, - .busy_idx = 3, - .idle_idx = 2, + + .cache_nice_tries = 0, + .busy_idx = 0, + .idle_idx = 0, .newidle_idx = 0, .wake_idx = 0, .forkexec_idx = 0, .flags = 1*SD_LOAD_BALANCE | 1*SD_BALANCE_NEWIDLE - | 0*SD_BALANCE_EXEC - | 0*SD_BALANCE_FORK + | 1*SD_BALANCE_EXEC + | 1*SD_BALANCE_FORK | 0*SD_BALANCE_WAKE - | 0*SD_WAKE_AFFINE + | 1*SD_WAKE_AFFINE | 0*SD_SHARE_CPUPOWER | 0*SD_SHARE_PKG_RESOURCES - | 1*SD_SERIALIZE + | 0*SD_SERIALIZE | 0*SD_PREFER_SIBLING - | 1*SD_NUMA - | sd_local_flags(level) + | 0*SD_NUMA + | sd_flags , + .last_balance = jiffies, .balance_interval = sd_weight, + .smt_gain = 0, .max_newidle_lb_cost = 0, .next_decay_max_lb_cost = jiffies, +#ifdef CONFIG_SCHED_DEBUG + .name = tl->name, +#endif }; - SD_INIT_NAME(sd, NUMA); - sd->private = &tl->data; /* - * Ugly hack to pass state to sd_numa_mask()... + * Convert topological properties into behaviour. */ - sched_domains_curr_level = tl->numa_level; + + if (sd->flags & SD_SHARE_CPUPOWER) { + sd->imbalance_pct = 110; + sd->smt_gain = 1178; /* ~15% */ + sd->flags |= arch_sd_sibling_asym_packing(); + + } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { + sd->imbalance_pct = 117; + sd->cache_nice_tries = 1; + sd->busy_idx = 2; + +#ifdef CONFIG_NUMA + } else if (sd->flags & SD_NUMA) { + sd->cache_nice_tries = 2; + sd->busy_idx = 3; + sd->idle_idx = 2; + + sd->flags |= SD_SERIALIZE; + if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) { + sd->flags &= ~(SD_BALANCE_EXEC | + SD_BALANCE_FORK | + SD_WAKE_AFFINE); + } + +#endif + } else { + sd->flags |= SD_PREFER_SIBLING; + sd->cache_nice_tries = 1; + sd->busy_idx = 2; + sd->idle_idx = 1; + } + + sd->private = &tl->data; return sd; } +/* + * Topology list, bottom-up. + */ +static struct sched_domain_topology_level default_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, +#endif +#ifdef CONFIG_SCHED_BOOK + { cpu_book_mask, SD_INIT_NAME(BOOK) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +struct sched_domain_topology_level *sched_domain_topology = default_topology; + +#define for_each_sd_topology(tl) \ + for (tl = sched_domain_topology; tl->mask; tl++) + +void set_sched_topology(struct sched_domain_topology_level *tl) +{ + sched_domain_topology = tl; +} + +#ifdef CONFIG_NUMA + static const struct cpumask *sd_numa_mask(int cpu) { return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; @@ -6183,7 +6186,10 @@ static void sched_init_numa(void) } } - tl = kzalloc((ARRAY_SIZE(default_topology) + level) * + /* Compute default topology size */ + for (i = 0; sched_domain_topology[i].mask; i++); + + tl = kzalloc((i + level) * sizeof(struct sched_domain_topology_level), GFP_KERNEL); if (!tl) return; @@ -6191,18 +6197,19 @@ static void sched_init_numa(void) /* * Copy the default topology bits.. */ - for (i = 0; default_topology[i].init; i++) - tl[i] = default_topology[i]; + for (i = 0; sched_domain_topology[i].mask; i++) + tl[i] = sched_domain_topology[i]; /* * .. and append 'j' levels of NUMA goodness. */ for (j = 0; j < level; i++, j++) { tl[i] = (struct sched_domain_topology_level){ - .init = sd_numa_init, .mask = sd_numa_mask, + .sd_flags = cpu_numa_flags, .flags = SDTL_OVERLAP, .numa_level = j, + SD_INIT_NAME(NUMA) }; } @@ -6360,7 +6367,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, const struct cpumask *cpu_map, struct sched_domain_attr *attr, struct sched_domain *child, int cpu) { - struct sched_domain *sd = tl->init(tl, cpu); + struct sched_domain *sd = sd_init(tl, cpu); if (!sd) return child; -- cgit v1.2.3-70-g09d2 From 607b45e9a216e89a63351556e488eea06be0ff48 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 11 Apr 2014 11:44:39 +0200 Subject: sched, powerpc: Create a dedicated topology table Create a dedicated topology table for handling asymetric feature of powerpc. Signed-off-by: Vincent Guittot Reviewed-by: Preeti U Murthy Signed-off-by: Peter Zijlstra Cc: Andy Fleming Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Linus Torvalds Cc: Michael Ellerman Cc: Paul Gortmaker Cc: Paul Mackerras Cc: Preeti U. Murthy Cc: Rob Herring Cc: Srivatsa S. Bhat Cc: Toshi Kani Cc: Vasant Hegde Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: schwidefsky@de.ibm.com Cc: cmetcalf@tilera.com Cc: dietmar.eggemann@arm.com Cc: devicetree@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1397209481-28542-4-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/smp.c | 31 +++++++++++++++++++++++-------- include/linux/sched.h | 2 -- kernel/sched/core.c | 6 ------ 3 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include/linux/sched.h') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e2a4232c587..10ffffef041 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -766,6 +766,28 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } +#ifdef CONFIG_SCHED_SMT +/* cpumask of CPUs with asymetric SMT dependancy */ +static const int powerpc_smt_flags(void) +{ + int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; + + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + flags |= SD_ASYM_PACKING; + } + return flags; +} +#endif + +static struct sched_domain_topology_level powerpc_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + void __init smp_cpus_done(unsigned int max_cpus) { cpumask_var_t old_mask; @@ -790,15 +812,8 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); -} + set_sched_topology(powerpc_topology); -int arch_sd_sibling_asym_packing(void) -{ - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - return SD_ASYM_PACKING; - } - return 0; } #ifdef CONFIG_HOTPLUG_CPU diff --git a/include/linux/sched.h b/include/linux/sched.h index 656b035c30e..439a153b840 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -877,8 +877,6 @@ enum cpu_idle_type { #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ #define SD_NUMA 0x4000 /* cross-node balancing */ -extern int __weak arch_sd_sibiling_asym_packing(void); - #ifdef CONFIG_SCHED_SMT static inline const int cpu_smt_flags(void) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e59e5aec745..7e348e238bf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5796,11 +5796,6 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight); } -int __weak arch_sd_sibling_asym_packing(void) -{ - return 0*SD_ASYM_PACKING; -} - /* * Initializers for schedule domains * Non-inlined to reduce accumulated stack pressure in build_sched_domains() @@ -5981,7 +5976,6 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) if (sd->flags & SD_SHARE_CPUPOWER) { sd->imbalance_pct = 110; sd->smt_gain = 1178; /* ~15% */ - sd->flags |= arch_sd_sibling_asym_packing(); } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->imbalance_pct = 117; -- cgit v1.2.3-70-g09d2 From d77b3ed5c9f8ebedf154b52b5e943c461f3d37e6 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 11 Apr 2014 11:44:40 +0200 Subject: sched: Add a new SD_SHARE_POWERDOMAIN for sched_domain A new flag SD_SHARE_POWERDOMAIN is created to reflect whether groups of CPUs in a sched_domain level can or not reach different power state. As an example, the flag should be cleared at CPU level if groups of cores can be power gated independently. This information can be used in the load balance decision or to add load balancing level between group of CPUs that can power gate independantly. This flag is part of the topology flags that can be set by arch. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: schwidefsky@de.ibm.com Cc: cmetcalf@tilera.com Cc: benh@kernel.crashing.org Cc: preeti@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1397209481-28542-5-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched/core.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 439a153b840..accb66bfd72 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -870,6 +870,7 @@ enum cpu_idle_type { #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ +#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7e348e238bf..1c9c3b7b26a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5261,7 +5261,8 @@ static int sd_degenerate(struct sched_domain *sd) SD_BALANCE_FORK | SD_BALANCE_EXEC | SD_SHARE_CPUPOWER | - SD_SHARE_PKG_RESOURCES)) { + SD_SHARE_PKG_RESOURCES | + SD_SHARE_POWERDOMAIN)) { if (sd->groups != sd->groups->next) return 0; } @@ -5292,7 +5293,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_BALANCE_EXEC | SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES | - SD_PREFER_SIBLING); + SD_PREFER_SIBLING | + SD_SHARE_POWERDOMAIN); if (nr_node_ids == 1) pflags &= ~SD_SERIALIZE; } @@ -5901,6 +5903,7 @@ static int sched_domains_curr_level; * SD_SHARE_CPUPOWER - describes SMT topologies * SD_SHARE_PKG_RESOURCES - describes shared caches * SD_NUMA - describes NUMA topologies + * SD_SHARE_POWERDOMAIN - describes shared power domain * * Odd one out: * SD_ASYM_PACKING - describes SMT quirks @@ -5909,7 +5912,8 @@ static int sched_domains_curr_level; (SD_SHARE_CPUPOWER | \ SD_SHARE_PKG_RESOURCES | \ SD_NUMA | \ - SD_ASYM_PACKING) + SD_ASYM_PACKING | \ + SD_SHARE_POWERDOMAIN) static struct sched_domain * sd_init(struct sched_domain_topology_level *tl, int cpu) -- cgit v1.2.3-70-g09d2 From 69dd0f848879328ae6c6f54c2ec80e49eef042d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Apr 2014 14:30:10 +0200 Subject: sched/idle: Remove TS_POLLING support Now that there are no architectures left using it, kill the support for TS_POLLING. Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andy Lutomirski Link: http://lkml.kernel.org/n/tip-6yurip2tfix2f4bfc5agu2s0@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 46 ++-------------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index accb66bfd72..725eef121c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2775,51 +2775,9 @@ static inline int spin_needbreak(spinlock_t *lock) /* * Idle thread specific functions to determine the need_resched - * polling state. We have two versions, one based on TS_POLLING in - * thread_info.status and one based on TIF_POLLING_NRFLAG in - * thread_info.flags + * polling state. */ -#ifdef TS_POLLING -static inline int tsk_is_polling(struct task_struct *p) -{ - return task_thread_info(p)->status & TS_POLLING; -} -static inline void __current_set_polling(void) -{ - current_thread_info()->status |= TS_POLLING; -} - -static inline bool __must_check current_set_polling_and_test(void) -{ - __current_set_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() - */ - smp_mb(); - - return unlikely(tif_need_resched()); -} - -static inline void __current_clr_polling(void) -{ - current_thread_info()->status &= ~TS_POLLING; -} - -static inline bool __must_check current_clr_polling_and_test(void) -{ - __current_clr_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() - */ - smp_mb(); - - return unlikely(tif_need_resched()); -} -#elif defined(TIF_POLLING_NRFLAG) +#ifdef TIF_POLLING_NRFLAG static inline int tsk_is_polling(struct task_struct *p) { return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -- cgit v1.2.3-70-g09d2 From 4027d080854d1be96ef134a1c3024d5276114db6 Mon Sep 17 00:00:00 2001 From: "xiaofeng.yan" Date: Fri, 9 May 2014 03:21:27 +0000 Subject: sched/rt: Fix 'struct sched_dl_entity' and dl_task_time() comments, to match the current upstream code Signed-off-by: xiaofeng.yan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1399605687-18094-1-git-send-email-xiaofeng.yan@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- kernel/sched/deadline.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 725eef121c9..0f91d00efd8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1175,8 +1175,8 @@ struct sched_dl_entity { /* * Original scheduling parameters. Copied here from sched_attr - * during sched_setscheduler2(), they will remain the same until - * the next sched_setscheduler2(). + * during sched_setattr(), they will remain the same until + * the next sched_setattr(). */ u64 dl_runtime; /* maximum runtime for each instance */ u64 dl_deadline; /* relative deadline of each instance */ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e0a04ae1e0d..f9ca7d19781 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -520,7 +520,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) * We need to take care of a possible races here. In fact, the * task might have changed its scheduling policy to something * different from SCHED_DEADLINE or changed its reservation - * parameters (through sched_setscheduler()). + * parameters (through sched_setattr()). */ if (!dl_task(p) || dl_se->dl_new) goto unlock; -- cgit v1.2.3-70-g09d2 From ad0f614e4723db8cead439cf414108cbf975b224 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Thu, 22 May 2014 11:54:20 -0700 Subject: wait: swap EXIT_ZOMBIE(Z) and EXIT_DEAD(X) chars in TASK_STATE_TO_CHAR_STR In commit ad86622b478e ("wait: swap EXIT_ZOMBIE and EXIT_DEAD to hide EXIT_TRACE from user-space") the order of task state definitions were changed: EXIT_DEAD and EXIT_ZOMBIE were swapped. Though the charterers for the states in TASK_STATE_TO_CHAR_STR string were not updated. This patch synchronizes the string to the order of definitions. Signed-off-by: Masatake YAMATO Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 25f54c79f75..21fbdae61b9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -220,7 +220,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #define TASK_PARKED 512 #define TASK_STATE_MAX 1024 -#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; -- cgit v1.2.3-70-g09d2 From f98bafa06a28fdfdd5c49f820f4d6560f636fc46 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jun 2014 16:07:34 -0700 Subject: memcg: kill CONFIG_MM_OWNER CONFIG_MM_OWNER makes no sense. It is not user-selectable, it is only selected by CONFIG_MEMCG automatically. So we can kill this option in init/Kconfig and do s/CONFIG_MM_OWNER/CONFIG_MEMCG/ globally. Signed-off-by: Oleg Nesterov Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- include/linux/sched.h | 4 ++-- init/Kconfig | 7 ------- kernel/exit.c | 4 ++-- kernel/fork.c | 4 ++-- 5 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8967e20cbe5..de1627232af 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -406,7 +406,7 @@ struct mm_struct { spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; #endif -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG /* * "owner" points to a task that is regarded as the canonical * user/owner of this mm. All of the following must be true in diff --git a/include/linux/sched.h b/include/linux/sched.h index 70f67e4e615..2f2dd7d932a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2967,7 +2967,7 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); #else @@ -2978,7 +2978,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm) static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ static inline unsigned long task_rlimit(const struct task_struct *tsk, unsigned int limit) diff --git a/init/Kconfig b/init/Kconfig index 4a1822a1a68..0a2f09a80e9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -933,7 +933,6 @@ config RESOURCE_COUNTERS config MEMCG bool "Memory Resource Controller for Control Groups" depends on RESOURCE_COUNTERS - select MM_OWNER select EVENTFD help Provides a memory resource controller that manages both anonymous @@ -951,9 +950,6 @@ config MEMCG disable memory resource controller and you can avoid overheads. (and lose benefits of memory resource controller) - This config option also selects MM_OWNER config option, which - could in turn add some fork/exit overhead. - config MEMCG_SWAP bool "Memory Resource Controller Swap Extension" depends on MEMCG && SWAP @@ -1179,9 +1175,6 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. -config MM_OWNER - bool - config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS diff --git a/kernel/exit.c b/kernel/exit.c index 6ed6a1d552b..da1b838de8a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -352,7 +352,7 @@ int disallow_signal(int sig) EXPORT_SYMBOL(disallow_signal); -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG /* * A task is exiting. If it owned this mm, find a new owner for the mm. */ @@ -434,7 +434,7 @@ assign_new_owner: task_unlock(c); put_task_struct(c); } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ /* * Turn us into a lazy TLB process if we diff --git a/kernel/fork.c b/kernel/fork.c index 59e3dcc5b8f..0d53eb0dfb6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1099,12 +1099,12 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { mm->owner = p; } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ /* * Initialize POSIX timer handling for a single task. -- cgit v1.2.3-70-g09d2 From b300a4ea665f7fa44f015616ac1874deca891c5e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 4 Jun 2014 16:11:27 -0700 Subject: kernel/user.c: drop unused field 'files' from user_struct Nobody seems uses it for a long time. Let's drop it. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/user.c | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2f2dd7d932a..611676fd4c2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -745,7 +745,6 @@ static inline int signal_group_exit(const struct signal_struct *sig) struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ - atomic_t files; /* How many open files does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_INOTIFY_USER atomic_t inotify_watches; /* How many inotify watches does this user have? */ diff --git a/kernel/user.c b/kernel/user.c index 294fc6a9416..4efa39350e4 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -87,7 +87,6 @@ static DEFINE_SPINLOCK(uidhash_lock); struct user_struct root_user = { .__count = ATOMIC_INIT(1), .processes = ATOMIC_INIT(1), - .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, -- cgit v1.2.3-70-g09d2 From 34a1b7236ad6113883f6c448d1da854cad60265e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jun 2014 16:12:19 -0700 Subject: kthreads: kill CLONE_KERNEL, change kernel_thread(kernel_init) to avoid CLONE_SIGHAND 1. Remove CLONE_KERNEL, it has no users and it is dangerous. The (old) comment says "List of flags we want to share for kernel threads" but this is not true, we do not want to share ->sighand by default. This flag can only be used if the caller is sure that both parent/child will never play with signals (say, allow_signal/etc). 2. Change rest_init() to clone kernel_init() without CLONE_SIGHAND. In this case CLONE_SIGHAND does not really hurt, and it looks like optimization because copy_sighand() can avoid kmem_cache_alloc(). But in fact this only adds the minor pessimization. kernel_init() is going to exec the init process, and de_thread() will need to unshare ->sighand and do kmem_cache_alloc(sighand_cachep) anyway, but it needs to do more work and take tasklist_lock and siglock. Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 6 ------ init/main.c | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 611676fd4c2..8fcd0e6098d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -136,12 +136,6 @@ struct filename; #define VMACACHE_SIZE (1U << VMACACHE_BITS) #define VMACACHE_MASK (VMACACHE_SIZE - 1) -/* - * List of flags we want to share for kernel threads, - * if only because they are not used by them anyway. - */ -#define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) - /* * These are the constant used to fake the fixed-point load-average * counting. Some notes: diff --git a/init/main.c b/init/main.c index 8ac3833f2bd..4de815c0309 100644 --- a/init/main.c +++ b/init/main.c @@ -380,7 +380,7 @@ static noinline void __init_refok rest_init(void) * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ - kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); + kernel_thread(kernel_init, NULL, CLONE_FS); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); -- cgit v1.2.3-70-g09d2 From fa93384f40deeb294fd29f2fdcadbd0ebc2dedf1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 May 2014 13:20:42 +0300 Subject: sched: Fix signedness bug in yield_to() yield_to() is supposed to return -ESRCH if there is no task to yield to, but because the type is bool that is the same as returning true. The only place I see which cares is kvm_vcpu_on_spin(). Signed-off-by: Dan Carpenter Reviewed-by: Raghavendra Signed-off-by: Peter Zijlstra Cc: Gleb Natapov Cc: Linus Torvalds Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/20140523102042.GA7267@mwanda Signed-off-by: Ingo Molnar --- include/linux/kvm_host.h | 2 +- include/linux/sched.h | 2 +- kernel/sched/core.c | 2 +- virt/kvm/kvm_main.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7d21cf9f438..3c4bcf14615 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -584,7 +584,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); -bool kvm_vcpu_yield_to(struct kvm_vcpu *target); +int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/include/linux/sched.h b/include/linux/sched.h index 0f91d00efd8..6790c3b4207 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2180,7 +2180,7 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } #endif -extern bool yield_to(struct task_struct *p, bool preempt); +extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 321d800e4ba..afcc84234a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4195,7 +4195,7 @@ EXPORT_SYMBOL(yield); * false (0) if we failed to boost the target. * -ESRCH if there's no task to yield to. */ -bool __sched yield_to(struct task_struct *p, bool preempt) +int __sched yield_to(struct task_struct *p, bool preempt) { struct task_struct *curr = current; struct rq *rq, *p_rq; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 56baae8c2f5..86d1c457458 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1708,11 +1708,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ -bool kvm_vcpu_yield_to(struct kvm_vcpu *target) +int kvm_vcpu_yield_to(struct kvm_vcpu *target) { struct pid *pid; struct task_struct *task = NULL; - bool ret = false; + int ret = 0; rcu_read_lock(); pid = rcu_dereference(target->pid); -- cgit v1.2.3-70-g09d2 From 63b2ca30bdb3dbf60bc7ac5f46713c0d32308261 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 26 May 2014 18:19:37 -0400 Subject: sched: Let 'struct sched_group_power' care about CPU capacity It is better not to think about compute capacity as being equivalent to "CPU power". The upcoming "power aware" scheduler work may create confusion with the notion of energy consumption if "power" is used too liberally. Since struct sched_group_power is really about compute capacity of sched groups, let's rename it to struct sched_group_capacity. Similarly sgp becomes sgc. Related variables and functions dealing with groups are also adjusted accordingly. Signed-off-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: Vincent Guittot Cc: Daniel Lezcano Cc: Morten Rasmussen Cc: "Rafael J. Wysocki" Cc: linaro-kernel@lists.linaro.org Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/n/tip-5yeix833vvgf2uyj5o36hpu9@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched/core.c | 81 +++++++++++++++---------------- kernel/sched/fair.c | 131 +++++++++++++++++++++++++------------------------- kernel/sched/sched.h | 16 +++--- 4 files changed, 115 insertions(+), 115 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6790c3b4207..a96f03598c6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1013,7 +1013,7 @@ typedef const int (*sched_domain_flags_f)(void); struct sd_data { struct sched_domain **__percpu sd; struct sched_group **__percpu sg; - struct sched_group_power **__percpu sgp; + struct sched_group_capacity **__percpu sgc; }; struct sched_domain_topology_level { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index afcc84234a3..2e1fb090220 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5221,14 +5221,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, } /* - * Even though we initialize ->power to something semi-sane, - * we leave power_orig unset. This allows us to detect if + * Even though we initialize ->capacity to something semi-sane, + * we leave capacity_orig unset. This allows us to detect if * domain iteration is still funny without causing /0 traps. */ - if (!group->sgp->power_orig) { + if (!group->sgc->capacity_orig) { printk(KERN_CONT "\n"); - printk(KERN_ERR "ERROR: domain->cpu_power not " - "set\n"); + printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n"); break; } @@ -5250,9 +5249,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); printk(KERN_CONT " %s", str); - if (group->sgp->power != SCHED_POWER_SCALE) { - printk(KERN_CONT " (cpu_power = %d)", - group->sgp->power); + if (group->sgc->capacity != SCHED_POWER_SCALE) { + printk(KERN_CONT " (cpu_capacity = %d)", + group->sgc->capacity); } group = group->next; @@ -5466,7 +5465,7 @@ static struct root_domain *alloc_rootdomain(void) return rd; } -static void free_sched_groups(struct sched_group *sg, int free_sgp) +static void free_sched_groups(struct sched_group *sg, int free_sgc) { struct sched_group *tmp, *first; @@ -5477,8 +5476,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgp) do { tmp = sg->next; - if (free_sgp && atomic_dec_and_test(&sg->sgp->ref)) - kfree(sg->sgp); + if (free_sgc && atomic_dec_and_test(&sg->sgc->ref)) + kfree(sg->sgc); kfree(sg); sg = tmp; @@ -5496,7 +5495,7 @@ static void free_sched_domain(struct rcu_head *rcu) if (sd->flags & SD_OVERLAP) { free_sched_groups(sd->groups, 1); } else if (atomic_dec_and_test(&sd->groups->ref)) { - kfree(sd->groups->sgp); + kfree(sd->groups->sgc); kfree(sd->groups); } kfree(sd); @@ -5707,17 +5706,17 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) cpumask_or(covered, covered, sg_span); - sg->sgp = *per_cpu_ptr(sdd->sgp, i); - if (atomic_inc_return(&sg->sgp->ref) == 1) + sg->sgc = *per_cpu_ptr(sdd->sgc, i); + if (atomic_inc_return(&sg->sgc->ref) == 1) build_group_mask(sd, sg); /* - * Initialize sgp->power such that even if we mess up the + * Initialize sgc->capacity such that even if we mess up the * domains and no possible iteration will get us here, we won't * die on a /0 trap. */ - sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); - sg->sgp->power_orig = sg->sgp->power; + sg->sgc->capacity = SCHED_POWER_SCALE * cpumask_weight(sg_span); + sg->sgc->capacity_orig = sg->sgc->capacity; /* * Make sure the first group of this domain contains the @@ -5755,8 +5754,8 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) if (sg) { *sg = *per_cpu_ptr(sdd->sg, cpu); - (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu); - atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */ + (*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu); + atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */ } return cpu; @@ -5819,16 +5818,16 @@ build_sched_groups(struct sched_domain *sd, int cpu) } /* - * Initialize sched groups cpu_power. + * Initialize sched groups cpu_capacity. * - * cpu_power indicates the capacity of sched group, which is used while + * cpu_capacity indicates the capacity of sched group, which is used while * distributing the load between different sched groups in a sched domain. - * Typically cpu_power for all the groups in a sched domain will be same unless - * there are asymmetries in the topology. If there are asymmetries, group - * having more cpu_power will pickup more load compared to the group having - * less cpu_power. + * Typically cpu_capacity for all the groups in a sched domain will be same + * unless there are asymmetries in the topology. If there are asymmetries, + * group having more cpu_capacity will pickup more load compared to the + * group having less cpu_capacity. */ -static void init_sched_groups_power(int cpu, struct sched_domain *sd) +static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) { struct sched_group *sg = sd->groups; @@ -5842,8 +5841,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) if (cpu != group_balance_cpu(sg)) return; - update_group_power(sd, cpu); - atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight); + update_group_capacity(sd, cpu); + atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight); } /* @@ -5934,8 +5933,8 @@ static void claim_allocations(int cpu, struct sched_domain *sd) if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) *per_cpu_ptr(sdd->sg, cpu) = NULL; - if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref)) - *per_cpu_ptr(sdd->sgp, cpu) = NULL; + if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref)) + *per_cpu_ptr(sdd->sgc, cpu) = NULL; } #ifdef CONFIG_NUMA @@ -6337,14 +6336,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map) if (!sdd->sg) return -ENOMEM; - sdd->sgp = alloc_percpu(struct sched_group_power *); - if (!sdd->sgp) + sdd->sgc = alloc_percpu(struct sched_group_capacity *); + if (!sdd->sgc) return -ENOMEM; for_each_cpu(j, cpu_map) { struct sched_domain *sd; struct sched_group *sg; - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); @@ -6362,12 +6361,12 @@ static int __sdt_alloc(const struct cpumask *cpu_map) *per_cpu_ptr(sdd->sg, j) = sg; - sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), + sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); - if (!sgp) + if (!sgc) return -ENOMEM; - *per_cpu_ptr(sdd->sgp, j) = sgp; + *per_cpu_ptr(sdd->sgc, j) = sgc; } } @@ -6394,15 +6393,15 @@ static void __sdt_free(const struct cpumask *cpu_map) if (sdd->sg) kfree(*per_cpu_ptr(sdd->sg, j)); - if (sdd->sgp) - kfree(*per_cpu_ptr(sdd->sgp, j)); + if (sdd->sgc) + kfree(*per_cpu_ptr(sdd->sgc, j)); } free_percpu(sdd->sd); sdd->sd = NULL; free_percpu(sdd->sg); sdd->sg = NULL; - free_percpu(sdd->sgp); - sdd->sgp = NULL; + free_percpu(sdd->sgc); + sdd->sgc = NULL; } } @@ -6479,7 +6478,7 @@ static int build_sched_domains(const struct cpumask *cpu_map, for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { claim_allocations(i, sd); - init_sched_groups_power(i, sd); + init_sched_groups_capacity(i, sd); } } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e401e446e87..36bd4d23fca 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4369,8 +4369,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, avg_load += load; } - /* Adjust by relative CPU power of the group */ - avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power; + /* Adjust by relative CPU capacity of the group */ + avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgc->capacity; if (local_group) { this_load = avg_load; @@ -5532,7 +5532,7 @@ struct sg_lb_stats { unsigned long group_load; /* Total load over the CPUs of the group */ unsigned long sum_weighted_load; /* Weighted load of group's tasks */ unsigned long load_per_task; - unsigned long group_power; + unsigned long group_capacity; unsigned int sum_nr_running; /* Nr tasks running in the group */ unsigned int group_capacity_factor; unsigned int idle_cpus; @@ -5553,7 +5553,7 @@ struct sd_lb_stats { struct sched_group *busiest; /* Busiest group in this sd */ struct sched_group *local; /* Local group in this sd */ unsigned long total_load; /* Total load of all groups in sd */ - unsigned long total_pwr; /* Total power of all groups in sd */ + unsigned long total_capacity; /* Total capacity of all groups in sd */ unsigned long avg_load; /* Average load across all groups in sd */ struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ @@ -5572,7 +5572,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) .busiest = NULL, .local = NULL, .total_load = 0UL, - .total_pwr = 0UL, + .total_capacity = 0UL, .busiest_stat = { .avg_load = 0UL, }, @@ -5681,7 +5681,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) power >>= SCHED_POWER_SHIFT; } - sdg->sgp->power_orig = power; + sdg->sgc->capacity_orig = power; if (sched_feat(ARCH_POWER)) power *= arch_scale_freq_power(sd, cpu); @@ -5697,26 +5697,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) power = 1; cpu_rq(cpu)->cpu_power = power; - sdg->sgp->power = power; + sdg->sgc->capacity = power; } -void update_group_power(struct sched_domain *sd, int cpu) +void update_group_capacity(struct sched_domain *sd, int cpu) { struct sched_domain *child = sd->child; struct sched_group *group, *sdg = sd->groups; - unsigned long power, power_orig; + unsigned long capacity, capacity_orig; unsigned long interval; interval = msecs_to_jiffies(sd->balance_interval); interval = clamp(interval, 1UL, max_load_balance_interval); - sdg->sgp->next_update = jiffies + interval; + sdg->sgc->next_update = jiffies + interval; if (!child) { update_cpu_power(sd, cpu); return; } - power_orig = power = 0; + capacity_orig = capacity = 0; if (child->flags & SD_OVERLAP) { /* @@ -5725,31 +5725,31 @@ void update_group_power(struct sched_domain *sd, int cpu) */ for_each_cpu(cpu, sched_group_cpus(sdg)) { - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; struct rq *rq = cpu_rq(cpu); /* - * build_sched_domains() -> init_sched_groups_power() + * build_sched_domains() -> init_sched_groups_capacity() * gets here before we've attached the domains to the * runqueues. * * Use power_of(), which is set irrespective of domains * in update_cpu_power(). * - * This avoids power/power_orig from being 0 and + * This avoids capacity/capacity_orig from being 0 and * causing divide-by-zero issues on boot. * - * Runtime updates will correct power_orig. + * Runtime updates will correct capacity_orig. */ if (unlikely(!rq->sd)) { - power_orig += power_of(cpu); - power += power_of(cpu); + capacity_orig += power_of(cpu); + capacity += power_of(cpu); continue; } - sgp = rq->sd->groups->sgp; - power_orig += sgp->power_orig; - power += sgp->power; + sgc = rq->sd->groups->sgc; + capacity_orig += sgc->capacity_orig; + capacity += sgc->capacity; } } else { /* @@ -5759,14 +5759,14 @@ void update_group_power(struct sched_domain *sd, int cpu) group = child->groups; do { - power_orig += group->sgp->power_orig; - power += group->sgp->power; + capacity_orig += group->sgc->capacity_orig; + capacity += group->sgc->capacity; group = group->next; } while (group != child->groups); } - sdg->sgp->power_orig = power_orig; - sdg->sgp->power = power; + sdg->sgc->capacity_orig = capacity_orig; + sdg->sgc->capacity = capacity; } /* @@ -5786,9 +5786,9 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) return 0; /* - * If ~90% of the cpu_power is still there, we're good. + * If ~90% of the cpu_capacity is still there, we're good. */ - if (group->sgp->power * 32 > group->sgp->power_orig * 29) + if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29) return 1; return 0; @@ -5825,7 +5825,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) static inline int sg_imbalanced(struct sched_group *group) { - return group->sgp->imbalance; + return group->sgc->imbalance; } /* @@ -5833,22 +5833,23 @@ static inline int sg_imbalanced(struct sched_group *group) * * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by * first dividing out the smt factor and computing the actual number of cores - * and limit power unit capacity with that. + * and limit unit capacity with that. */ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group) { unsigned int capacity_factor, smt, cpus; - unsigned int power, power_orig; + unsigned int capacity, capacity_orig; - power = group->sgp->power; - power_orig = group->sgp->power_orig; + capacity = group->sgc->capacity; + capacity_orig = group->sgc->capacity_orig; cpus = group->group_weight; - /* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */ - smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig); + /* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */ + smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, capacity_orig); capacity_factor = cpus / smt; /* cores */ - capacity_factor = min_t(unsigned, capacity_factor, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE)); + capacity_factor = min_t(unsigned, + capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE)); if (!capacity_factor) capacity_factor = fix_small_capacity(env->sd, group); @@ -5892,9 +5893,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->idle_cpus++; } - /* Adjust by relative CPU power of the group */ - sgs->group_power = group->sgp->power; - sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power; + /* Adjust by relative CPU capacity of the group */ + sgs->group_capacity = group->sgc->capacity; + sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_capacity; if (sgs->sum_nr_running) sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; @@ -6009,8 +6010,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sgs = &sds->local_stat; if (env->idle != CPU_NEWLY_IDLE || - time_after_eq(jiffies, sg->sgp->next_update)) - update_group_power(env->sd, env->dst_cpu); + time_after_eq(jiffies, sg->sgc->next_update)) + update_group_capacity(env->sd, env->dst_cpu); } update_sg_lb_stats(env, sg, load_idx, local_group, sgs); @@ -6040,7 +6041,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd next_group: /* Now, start updating sd_lb_stats */ sds->total_load += sgs->group_load; - sds->total_pwr += sgs->group_power; + sds->total_capacity += sgs->group_capacity; sg = sg->next; } while (sg != env->sd->groups); @@ -6087,7 +6088,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) return 0; env->imbalance = DIV_ROUND_CLOSEST( - sds->busiest_stat.avg_load * sds->busiest_stat.group_power, + sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity, SCHED_POWER_SCALE); return 1; @@ -6103,7 +6104,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) static inline void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) { - unsigned long tmp, pwr_now = 0, pwr_move = 0; + unsigned long tmp, capa_now = 0, capa_move = 0; unsigned int imbn = 2; unsigned long scaled_busy_load_per_task; struct sg_lb_stats *local, *busiest; @@ -6118,7 +6119,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) scaled_busy_load_per_task = (busiest->load_per_task * SCHED_POWER_SCALE) / - busiest->group_power; + busiest->group_capacity; if (busiest->avg_load + scaled_busy_load_per_task >= local->avg_load + (scaled_busy_load_per_task * imbn)) { @@ -6132,34 +6133,34 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) * moving them. */ - pwr_now += busiest->group_power * + capa_now += busiest->group_capacity * min(busiest->load_per_task, busiest->avg_load); - pwr_now += local->group_power * + capa_now += local->group_capacity * min(local->load_per_task, local->avg_load); - pwr_now /= SCHED_POWER_SCALE; + capa_now /= SCHED_POWER_SCALE; /* Amount of load we'd subtract */ if (busiest->avg_load > scaled_busy_load_per_task) { - pwr_move += busiest->group_power * + capa_move += busiest->group_capacity * min(busiest->load_per_task, busiest->avg_load - scaled_busy_load_per_task); } /* Amount of load we'd add */ - if (busiest->avg_load * busiest->group_power < + if (busiest->avg_load * busiest->group_capacity < busiest->load_per_task * SCHED_POWER_SCALE) { - tmp = (busiest->avg_load * busiest->group_power) / - local->group_power; + tmp = (busiest->avg_load * busiest->group_capacity) / + local->group_capacity; } else { tmp = (busiest->load_per_task * SCHED_POWER_SCALE) / - local->group_power; + local->group_capacity; } - pwr_move += local->group_power * + capa_move += local->group_capacity * min(local->load_per_task, local->avg_load + tmp); - pwr_move /= SCHED_POWER_SCALE; + capa_move /= SCHED_POWER_SCALE; /* Move if we gain throughput */ - if (pwr_move > pwr_now) + if (capa_move > capa_now) env->imbalance = busiest->load_per_task; } @@ -6207,7 +6208,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s (busiest->sum_nr_running - busiest->group_capacity_factor); load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); - load_above_capacity /= busiest->group_power; + load_above_capacity /= busiest->group_capacity; } /* @@ -6222,8 +6223,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /* How much load to actually move to equalise the imbalance */ env->imbalance = min( - max_pull * busiest->group_power, - (sds->avg_load - local->avg_load) * local->group_power + max_pull * busiest->group_capacity, + (sds->avg_load - local->avg_load) * local->group_capacity ) / SCHED_POWER_SCALE; /* @@ -6278,7 +6279,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (!sds.busiest || busiest->sum_nr_running == 0) goto out_balanced; - sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr; + sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_capacity; /* * If the busiest group is imbalanced the below checks don't @@ -6611,7 +6612,7 @@ more_balance: * We failed to reach balance because of affinity. */ if (sd_parent) { - int *group_imbalance = &sd_parent->groups->sgp->imbalance; + int *group_imbalance = &sd_parent->groups->sgc->imbalance; if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) { *group_imbalance = 1; @@ -6998,7 +6999,7 @@ static inline void set_cpu_sd_state_busy(void) goto unlock; sd->nohz_idle = 0; - atomic_inc(&sd->groups->sgp->nr_busy_cpus); + atomic_inc(&sd->groups->sgc->nr_busy_cpus); unlock: rcu_read_unlock(); } @@ -7015,7 +7016,7 @@ void set_cpu_sd_state_idle(void) goto unlock; sd->nohz_idle = 1; - atomic_dec(&sd->groups->sgp->nr_busy_cpus); + atomic_dec(&sd->groups->sgc->nr_busy_cpus); unlock: rcu_read_unlock(); } @@ -7219,7 +7220,7 @@ end: * of an idle cpu is the system. * - This rq has more than one task. * - At any scheduler domain level, this cpu's scheduler group has multiple - * busy cpu's exceeding the group's power. + * busy cpu's exceeding the group's capacity. * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * domain span are idle. */ @@ -7227,7 +7228,7 @@ static inline int nohz_kick_needed(struct rq *rq) { unsigned long now = jiffies; struct sched_domain *sd; - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; int nr_busy, cpu = rq->cpu; if (unlikely(rq->idle_balance)) @@ -7257,8 +7258,8 @@ static inline int nohz_kick_needed(struct rq *rq) sd = rcu_dereference(per_cpu(sd_busy, cpu)); if (sd) { - sgp = sd->groups->sgp; - nr_busy = atomic_read(&sgp->nr_busy_cpus); + sgc = sd->groups->sgc; + nr_busy = atomic_read(&sgc->nr_busy_cpus); if (nr_busy > 1) goto need_kick_unlock; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 600e2291a75..a5b957d53c9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -728,15 +728,15 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa); DECLARE_PER_CPU(struct sched_domain *, sd_busy); DECLARE_PER_CPU(struct sched_domain *, sd_asym); -struct sched_group_power { +struct sched_group_capacity { atomic_t ref; /* - * CPU power of this group, SCHED_LOAD_SCALE being max power for a - * single CPU. + * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity + * for a single CPU. */ - unsigned int power, power_orig; + unsigned int capacity, capacity_orig; unsigned long next_update; - int imbalance; /* XXX unrelated to power but shared group state */ + int imbalance; /* XXX unrelated to capacity but shared group state */ /* * Number of busy cpus in this group. */ @@ -750,7 +750,7 @@ struct sched_group { atomic_t ref; unsigned int group_weight; - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; /* * The CPUs this group covers. @@ -773,7 +773,7 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) */ static inline struct cpumask *sched_group_mask(struct sched_group *sg) { - return to_cpumask(sg->sgp->cpumask); + return to_cpumask(sg->sgc->cpumask); } /** @@ -1167,7 +1167,7 @@ extern const struct sched_class idle_sched_class; #ifdef CONFIG_SMP -extern void update_group_power(struct sched_domain *sd, int cpu); +extern void update_group_capacity(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); -- cgit v1.2.3-70-g09d2 From ca8ce3d0b144c318a5a9ce99649053e9029061ea Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 26 May 2014 18:19:39 -0400 Subject: sched: Final power vs. capacity cleanups It is better not to think about compute capacity as being equivalent to "CPU power". The upcoming "power aware" scheduler work may create confusion with the notion of energy consumption if "power" is used too liberally. This contains the architecture visible changes. Incidentally, only ARM takes advantage of the available pow^H^H^Hcapacity scaling hooks and therefore those changes outside kernel/sched/ are confined to one ARM specific file. The default arch_scale_smt_power() hook is not overridden by anyone. Replacements are as follows: arch_scale_freq_power --> arch_scale_freq_capacity arch_scale_smt_power --> arch_scale_smt_capacity SCHED_POWER_SCALE --> SCHED_CAPACITY_SCALE SCHED_POWER_SHIFT --> SCHED_CAPACITY_SHIFT The local usage of "power" in arch/arm/kernel/topology.c is also changed to "capacity" as appropriate. Signed-off-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: Vincent Guittot Cc: Daniel Lezcano Cc: Morten Rasmussen Cc: "Rafael J. Wysocki" Cc: linaro-kernel@lists.linaro.org Cc: Arnd Bergmann Cc: Dietmar Eggemann Cc: Grant Likely Cc: Linus Torvalds Cc: Mark Brown Cc: Rob Herring Cc: Russell King Cc: Sudeep KarkadaNagesha Cc: Vincent Guittot Cc: devicetree@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/n/tip-48zba9qbznvglwelgq2cfygh@git.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/kernel/topology.c | 54 +++++++++++++++++++++--------------------- include/linux/sched.h | 6 ++--- kernel/sched/core.c | 6 ++--- kernel/sched/fair.c | 59 +++++++++++++++++++++++----------------------- 4 files changed, 63 insertions(+), 62 deletions(-) (limited to 'include/linux/sched.h') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 71e1fec6d31..d42a7db2223 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -26,30 +26,30 @@ #include /* - * cpu power scale management + * cpu capacity scale management */ /* - * cpu power table + * cpu capacity table * This per cpu data structure describes the relative capacity of each core. * On a heteregenous system, cores don't have the same computation capacity - * and we reflect that difference in the cpu_power field so the scheduler can - * take this difference into account during load balance. A per cpu structure - * is preferred because each CPU updates its own cpu_power field during the - * load balance except for idle cores. One idle core is selected to run the - * rebalance_domains for all idle cores and the cpu_power can be updated - * during this sequence. + * and we reflect that difference in the cpu_capacity field so the scheduler + * can take this difference into account during load balance. A per cpu + * structure is preferred because each CPU updates its own cpu_capacity field + * during the load balance except for idle cores. One idle core is selected + * to run the rebalance_domains for all idle cores and the cpu_capacity can be + * updated during this sequence. */ static DEFINE_PER_CPU(unsigned long, cpu_scale); -unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) { return per_cpu(cpu_scale, cpu); } -static void set_power_scale(unsigned int cpu, unsigned long power) +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) { - per_cpu(cpu_scale, cpu) = power; + per_cpu(cpu_scale, cpu) = capacity; } #ifdef CONFIG_OF @@ -62,11 +62,11 @@ struct cpu_efficiency { * Table of relative efficiency of each processors * The efficiency value must fit in 20bit and the final * cpu_scale value must be in the range - * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 * in order to return at most 1 when DIV_ROUND_CLOSEST * is used to compute the capacity of a CPU. * Processors that are not defined in the table, - * use the default SCHED_POWER_SCALE value for cpu_scale. + * use the default SCHED_CAPACITY_SCALE value for cpu_scale. */ static const struct cpu_efficiency table_efficiency[] = { {"arm,cortex-a15", 3891}, @@ -83,9 +83,9 @@ static unsigned long middle_capacity = 1; * Iterate all CPUs' descriptor in DT and compute the efficiency * (as per table_efficiency). Also calculate a middle efficiency * as close as possible to (max{eff_i} - min{eff_i}) / 2 - * This is later used to scale the cpu_power field such that an - * 'average' CPU is of middle power. Also see the comments near - * table_efficiency[] and update_cpu_power(). + * This is later used to scale the cpu_capacity field such that an + * 'average' CPU is of middle capacity. Also see the comments near + * table_efficiency[] and update_cpu_capacity(). */ static void __init parse_dt_topology(void) { @@ -141,15 +141,15 @@ static void __init parse_dt_topology(void) * cpu_scale because all CPUs have the same capacity. Otherwise, we * compute a middle_capacity factor that will ensure that the capacity * of an 'average' CPU of the system will be as close as possible to - * SCHED_POWER_SCALE, which is the default value, but with the + * SCHED_CAPACITY_SCALE, which is the default value, but with the * constraint explained near table_efficiency[]. */ if (4*max_capacity < (3*(max_capacity + min_capacity))) middle_capacity = (min_capacity + max_capacity) - >> (SCHED_POWER_SHIFT+1); + >> (SCHED_CAPACITY_SHIFT+1); else middle_capacity = ((max_capacity / 3) - >> (SCHED_POWER_SHIFT-1)) + 1; + >> (SCHED_CAPACITY_SHIFT-1)) + 1; } @@ -158,20 +158,20 @@ static void __init parse_dt_topology(void) * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the * function returns directly for SMP system. */ -static void update_cpu_power(unsigned int cpu) +static void update_cpu_capacity(unsigned int cpu) { if (!cpu_capacity(cpu)) return; - set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); - printk(KERN_INFO "CPU%u: update cpu_power %lu\n", - cpu, arch_scale_freq_power(NULL, cpu)); + printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n", + cpu, arch_scale_freq_capacity(NULL, cpu)); } #else static inline void parse_dt_topology(void) {} -static inline void update_cpu_power(unsigned int cpuid) {} +static inline void update_cpu_capacity(unsigned int cpuid) {} #endif /* @@ -267,7 +267,7 @@ void store_cpu_topology(unsigned int cpuid) update_siblings_masks(cpuid); - update_cpu_power(cpuid); + update_cpu_capacity(cpuid); printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, @@ -297,7 +297,7 @@ void __init init_cpu_topology(void) { unsigned int cpu; - /* init core mask and power*/ + /* init core mask and capacity */ for_each_possible_cpu(cpu) { struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); @@ -307,7 +307,7 @@ void __init init_cpu_topology(void) cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); - set_power_scale(cpu, SCHED_POWER_SCALE); + set_capacity_scale(cpu, SCHED_CAPACITY_SCALE); } smp_wmb(); diff --git a/include/linux/sched.h b/include/linux/sched.h index a96f03598c6..322110affe6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -854,10 +854,10 @@ enum cpu_idle_type { }; /* - * Increase resolution of cpu_power calculations + * Increase resolution of cpu_capacity calculations */ -#define SCHED_POWER_SHIFT 10 -#define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT) +#define SCHED_CAPACITY_SHIFT 10 +#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) /* * sched-domains (multiprocessor balancing) declarations: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 07bc78a5032..7ba4f5413a1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5249,7 +5249,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); printk(KERN_CONT " %s", str); - if (group->sgc->capacity != SCHED_POWER_SCALE) { + if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { printk(KERN_CONT " (cpu_capacity = %d)", group->sgc->capacity); } @@ -5715,7 +5715,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) * domains and no possible iteration will get us here, we won't * die on a /0 trap. */ - sg->sgc->capacity = SCHED_POWER_SCALE * cpumask_weight(sg_span); + sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); sg->sgc->capacity_orig = sg->sgc->capacity; /* @@ -6921,7 +6921,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP rq->sd = NULL; rq->rd = NULL; - rq->cpu_capacity = SCHED_POWER_SCALE; + rq->cpu_capacity = SCHED_CAPACITY_SCALE; rq->post_schedule = 0; rq->active_balance = 0; rq->next_balance = jiffies; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 58684f684fa..dc7d6527a28 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1062,9 +1062,9 @@ static void update_numa_stats(struct numa_stats *ns, int nid) if (!cpus) return; - ns->load = (ns->load * SCHED_POWER_SCALE) / ns->compute_capacity; + ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity; ns->task_capacity = - DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_POWER_SCALE); + DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE); ns->has_free_capacity = (ns->nr_running < ns->task_capacity); } @@ -4370,7 +4370,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, } /* Adjust by relative CPU capacity of the group */ - avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgc->capacity; + avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity; if (local_group) { this_load = avg_load; @@ -5609,10 +5609,10 @@ static inline int get_sd_load_idx(struct sched_domain *sd, static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu) { - return SCHED_POWER_SCALE; + return SCHED_CAPACITY_SCALE; } -unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) +unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu) { return default_scale_capacity(sd, cpu); } @@ -5627,7 +5627,7 @@ static unsigned long default_scale_smt_capacity(struct sched_domain *sd, int cpu return smt_gain; } -unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) +unsigned long __weak arch_scale_smt_capacity(struct sched_domain *sd, int cpu) { return default_scale_smt_capacity(sd, cpu); } @@ -5658,10 +5658,10 @@ static unsigned long scale_rt_capacity(int cpu) available = total - avg; } - if (unlikely((s64)total < SCHED_POWER_SCALE)) - total = SCHED_POWER_SCALE; + if (unlikely((s64)total < SCHED_CAPACITY_SCALE)) + total = SCHED_CAPACITY_SCALE; - total >>= SCHED_POWER_SHIFT; + total >>= SCHED_CAPACITY_SHIFT; return div_u64(available, total); } @@ -5669,29 +5669,29 @@ static unsigned long scale_rt_capacity(int cpu) static void update_cpu_capacity(struct sched_domain *sd, int cpu) { unsigned long weight = sd->span_weight; - unsigned long capacity = SCHED_POWER_SCALE; + unsigned long capacity = SCHED_CAPACITY_SCALE; struct sched_group *sdg = sd->groups; if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { if (sched_feat(ARCH_POWER)) - capacity *= arch_scale_smt_power(sd, cpu); + capacity *= arch_scale_smt_capacity(sd, cpu); else capacity *= default_scale_smt_capacity(sd, cpu); - capacity >>= SCHED_POWER_SHIFT; + capacity >>= SCHED_CAPACITY_SHIFT; } sdg->sgc->capacity_orig = capacity; if (sched_feat(ARCH_POWER)) - capacity *= arch_scale_freq_power(sd, cpu); + capacity *= arch_scale_freq_capacity(sd, cpu); else capacity *= default_scale_capacity(sd, cpu); - capacity >>= SCHED_POWER_SHIFT; + capacity >>= SCHED_CAPACITY_SHIFT; capacity *= scale_rt_capacity(cpu); - capacity >>= SCHED_POWER_SHIFT; + capacity >>= SCHED_CAPACITY_SHIFT; if (!capacity) capacity = 1; @@ -5780,7 +5780,7 @@ static inline int fix_small_capacity(struct sched_domain *sd, struct sched_group *group) { /* - * Only siblings can have significantly less than SCHED_POWER_SCALE + * Only siblings can have significantly less than SCHED_CAPACITY_SCALE */ if (!(sd->flags & SD_SHARE_CPUPOWER)) return 0; @@ -5845,11 +5845,11 @@ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *gro cpus = group->group_weight; /* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */ - smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, capacity_orig); + smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig); capacity_factor = cpus / smt; /* cores */ capacity_factor = min_t(unsigned, - capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE)); + capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE)); if (!capacity_factor) capacity_factor = fix_small_capacity(env->sd, group); @@ -5895,7 +5895,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, /* Adjust by relative CPU capacity of the group */ sgs->group_capacity = group->sgc->capacity; - sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_capacity; + sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; if (sgs->sum_nr_running) sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; @@ -6089,7 +6089,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) env->imbalance = DIV_ROUND_CLOSEST( sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity, - SCHED_POWER_SCALE); + SCHED_CAPACITY_SCALE); return 1; } @@ -6118,7 +6118,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) imbn = 1; scaled_busy_load_per_task = - (busiest->load_per_task * SCHED_POWER_SCALE) / + (busiest->load_per_task * SCHED_CAPACITY_SCALE) / busiest->group_capacity; if (busiest->avg_load + scaled_busy_load_per_task >= @@ -6137,7 +6137,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) min(busiest->load_per_task, busiest->avg_load); capa_now += local->group_capacity * min(local->load_per_task, local->avg_load); - capa_now /= SCHED_POWER_SCALE; + capa_now /= SCHED_CAPACITY_SCALE; /* Amount of load we'd subtract */ if (busiest->avg_load > scaled_busy_load_per_task) { @@ -6148,16 +6148,16 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) /* Amount of load we'd add */ if (busiest->avg_load * busiest->group_capacity < - busiest->load_per_task * SCHED_POWER_SCALE) { + busiest->load_per_task * SCHED_CAPACITY_SCALE) { tmp = (busiest->avg_load * busiest->group_capacity) / local->group_capacity; } else { - tmp = (busiest->load_per_task * SCHED_POWER_SCALE) / + tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) / local->group_capacity; } capa_move += local->group_capacity * min(local->load_per_task, local->avg_load + tmp); - capa_move /= SCHED_POWER_SCALE; + capa_move /= SCHED_CAPACITY_SCALE; /* Move if we gain throughput */ if (capa_move > capa_now) @@ -6207,7 +6207,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s load_above_capacity = (busiest->sum_nr_running - busiest->group_capacity_factor); - load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); + load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE); load_above_capacity /= busiest->group_capacity; } @@ -6225,7 +6225,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s env->imbalance = min( max_pull * busiest->group_capacity, (sds->avg_load - local->avg_load) * local->group_capacity - ) / SCHED_POWER_SCALE; + ) / SCHED_CAPACITY_SCALE; /* * if *imbalance is less than the average load per runnable task @@ -6279,7 +6279,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (!sds.busiest || busiest->sum_nr_running == 0) goto out_balanced; - sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_capacity; + sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load) + / sds.total_capacity; /* * If the busiest group is imbalanced the below checks don't @@ -6378,7 +6379,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, continue; capacity = capacity_of(i); - capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE); + capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE); if (!capacity_factor) capacity_factor = fix_small_capacity(env->sd, group); -- cgit v1.2.3-70-g09d2 From 5d4dfddd4f02b028d6ddaaa04d75d3b0cad1c9ae Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 27 May 2014 13:50:41 -0400 Subject: sched: Rename capacity related flags It is better not to think about compute capacity as being equivalent to "CPU power". The upcoming "power aware" scheduler work may create confusion with the notion of energy consumption if "power" is used too liberally. Let's rename the following feature flags since they do relate to capacity: SD_SHARE_CPUPOWER -> SD_SHARE_CPUCAPACITY ARCH_POWER -> ARCH_CAPACITY NONTASK_POWER -> NONTASK_CAPACITY Signed-off-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: Vincent Guittot Cc: Daniel Lezcano Cc: Morten Rasmussen Cc: "Rafael J. Wysocki" Cc: linaro-kernel@lists.linaro.org Cc: Andy Fleming Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Linus Torvalds Cc: Michael Ellerman Cc: Paul Gortmaker Cc: Paul Mackerras Cc: Preeti U Murthy Cc: Rob Herring Cc: Srivatsa S. Bhat Cc: Toshi Kani Cc: Vasant Hegde Cc: Vincent Guittot Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/n/tip-e93lpnxb87owfievqatey6b5@git.kernel.org Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/smp.c | 2 +- include/linux/sched.h | 4 ++-- kernel/sched/core.c | 14 +++++++------- kernel/sched/fair.c | 8 ++++---- kernel/sched/features.h | 8 ++++---- 5 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux/sched.h') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 10ffffef041..c51d16379cb 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -770,7 +770,7 @@ int setup_profiling_timer(unsigned int multiplier) /* cpumask of CPUs with asymetric SMT dependancy */ static const int powerpc_smt_flags(void) { - int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); diff --git a/include/linux/sched.h b/include/linux/sched.h index 322110affe6..ce93768a331 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -869,7 +869,7 @@ enum cpu_idle_type { #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ +#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */ #define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ @@ -881,7 +881,7 @@ enum cpu_idle_type { #ifdef CONFIG_SCHED_SMT static inline const int cpu_smt_flags(void) { - return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; } #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7ba4f5413a1..5976ca579d3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -872,7 +872,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) rq->clock_task += delta; #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) - if ((irq_delta + steal) && sched_feat(NONTASK_POWER)) + if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) sched_rt_avg_update(rq, irq_delta + steal); #endif } @@ -5309,7 +5309,7 @@ static int sd_degenerate(struct sched_domain *sd) SD_BALANCE_NEWIDLE | SD_BALANCE_FORK | SD_BALANCE_EXEC | - SD_SHARE_CPUPOWER | + SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN)) { if (sd->groups != sd->groups->next) @@ -5340,7 +5340,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_BALANCE_NEWIDLE | SD_BALANCE_FORK | SD_BALANCE_EXEC | - SD_SHARE_CPUPOWER | + SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_PREFER_SIBLING | SD_SHARE_POWERDOMAIN); @@ -5947,7 +5947,7 @@ static int sched_domains_curr_level; /* * SD_flags allowed in topology descriptions. * - * SD_SHARE_CPUPOWER - describes SMT topologies + * SD_SHARE_CPUCAPACITY - describes SMT topologies * SD_SHARE_PKG_RESOURCES - describes shared caches * SD_NUMA - describes NUMA topologies * SD_SHARE_POWERDOMAIN - describes shared power domain @@ -5956,7 +5956,7 @@ static int sched_domains_curr_level; * SD_ASYM_PACKING - describes SMT quirks */ #define TOPOLOGY_SD_FLAGS \ - (SD_SHARE_CPUPOWER | \ + (SD_SHARE_CPUCAPACITY | \ SD_SHARE_PKG_RESOURCES | \ SD_NUMA | \ SD_ASYM_PACKING | \ @@ -6002,7 +6002,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) | 1*SD_BALANCE_FORK | 0*SD_BALANCE_WAKE | 1*SD_WAKE_AFFINE - | 0*SD_SHARE_CPUPOWER + | 0*SD_SHARE_CPUCAPACITY | 0*SD_SHARE_PKG_RESOURCES | 0*SD_SERIALIZE | 0*SD_PREFER_SIBLING @@ -6024,7 +6024,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) * Convert topological properties into behaviour. */ - if (sd->flags & SD_SHARE_CPUPOWER) { + if (sd->flags & SD_SHARE_CPUCAPACITY) { sd->imbalance_pct = 110; sd->smt_gain = 1178; /* ~15% */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dc7d6527a28..d3c73122219 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5672,8 +5672,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) unsigned long capacity = SCHED_CAPACITY_SCALE; struct sched_group *sdg = sd->groups; - if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { - if (sched_feat(ARCH_POWER)) + if ((sd->flags & SD_SHARE_CPUCAPACITY) && weight > 1) { + if (sched_feat(ARCH_CAPACITY)) capacity *= arch_scale_smt_capacity(sd, cpu); else capacity *= default_scale_smt_capacity(sd, cpu); @@ -5683,7 +5683,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) sdg->sgc->capacity_orig = capacity; - if (sched_feat(ARCH_POWER)) + if (sched_feat(ARCH_CAPACITY)) capacity *= arch_scale_freq_capacity(sd, cpu); else capacity *= default_scale_capacity(sd, cpu); @@ -5782,7 +5782,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) /* * Only siblings can have significantly less than SCHED_CAPACITY_SCALE */ - if (!(sd->flags & SD_SHARE_CPUPOWER)) + if (!(sd->flags & SD_SHARE_CPUCAPACITY)) return 0; /* diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 5716929a2e3..90284d117fe 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -37,18 +37,18 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true) SCHED_FEAT(WAKEUP_PREEMPTION, true) /* - * Use arch dependent cpu power functions + * Use arch dependent cpu capacity functions */ -SCHED_FEAT(ARCH_POWER, true) +SCHED_FEAT(ARCH_CAPACITY, true) SCHED_FEAT(HRTICK, false) SCHED_FEAT(DOUBLE_TICK, false) SCHED_FEAT(LB_BIAS, true) /* - * Decrement CPU power based on time not spent running tasks + * Decrement CPU capacity based on time not spent running tasks */ -SCHED_FEAT(NONTASK_POWER, true) +SCHED_FEAT(NONTASK_CAPACITY, true) /* * Queue remote wakeups on the target CPU and process them -- cgit v1.2.3-70-g09d2 From 82b897782d10fcc4930c9d4a15b175348fdd2871 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 28 May 2014 11:45:04 +0300 Subject: perf: Differentiate exec() and non-exec() comm events perf tools like 'perf report' can aggregate samples by comm strings, which generally works. However, there are other potential use-cases. For example, to pair up 'calls' with 'returns' accurately (from branch events like Intel BTS) it is necessary to identify whether the process has exec'd. Although a comm event is generated when an 'exec' happens it is also generated whenever the comm string is changed on a whim (e.g. by prctl PR_SET_NAME). This patch adds a flag to the comm event to differentiate one case from the other. In order to determine whether the kernel supports the new flag, a selection bit named 'exec' is added to struct perf_event_attr. The bit does nothing but will cause perf_event_open() to fail if the bit is set on kernels that do not have it defined. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/537D9EBE.7030806@intel.com Cc: Paul Mackerras Cc: Dave Jones Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Alexander Viro Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- fs/exec.c | 6 +++--- include/linux/perf_event.h | 4 ++-- include/linux/sched.h | 6 +++++- include/uapi/linux/perf_event.h | 9 +++++++-- kernel/events/core.c | 4 ++-- 5 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include/linux/sched.h') diff --git a/fs/exec.c b/fs/exec.c index a038a41a367..a3d33fe592d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm); * so that a new one can be started */ -void set_task_comm(struct task_struct *tsk, const char *buf) +void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { task_lock(tsk); trace_task_rename(tsk, buf); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_event_comm(tsk); + perf_event_comm(tsk, exec); } int flush_old_exec(struct linux_binprm * bprm) @@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); perf_event_exec(); - set_task_comm(current, kbasename(bprm->filename)); + __set_task_comm(current, kbasename(bprm->filename), true); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b4c1d4685bf..707617a8c0f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -707,7 +707,7 @@ extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks * extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern void perf_event_exec(void); -extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -815,7 +815,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } -static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 221b2bde372..ad86e1d7dbc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2379,7 +2379,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern void set_task_comm(struct task_struct *tsk, const char *from); +extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); +static inline void set_task_comm(struct task_struct *tsk, const char *from) +{ + __set_task_comm(tsk, from, false); +} extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d9cd853818a..5312fae4721 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -302,8 +302,8 @@ struct perf_event_attr { exclude_callchain_kernel : 1, /* exclude kernel callchains */ exclude_callchain_user : 1, /* exclude user callchains */ mmap2 : 1, /* include mmap with inode data */ - - __reserved_1 : 40; + comm_exec : 1, /* flag comm events that are due to an exec */ + __reserved_1 : 39; union { __u32 wakeup_events; /* wakeup every n events */ @@ -502,7 +502,12 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) #define PERF_RECORD_MISC_GUEST_USER (5 << 0) +/* + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on + * different events so can reuse the same bit position. + */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) +#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) /* * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also diff --git a/kernel/events/core.c b/kernel/events/core.c index 8fac2056d51..7da5e561e89 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5090,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) NULL); } -void perf_event_comm(struct task_struct *task) +void perf_event_comm(struct task_struct *task, bool exec) { struct perf_comm_event comm_event; @@ -5104,7 +5104,7 @@ void perf_event_comm(struct task_struct *task) .event_id = { .header = { .type = PERF_RECORD_COMM, - .misc = 0, + .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0, /* .size */ }, /* .pid */ -- cgit v1.2.3-70-g09d2 From 0341729b4b832e753c5e745c6ba0e797f6198be0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Jun 2014 14:36:53 -0700 Subject: signals: mv {dis,}allow_signal() from sched.h/exit.c to signal.[ch] Move the declaration/definition of allow_signal/disallow_signal to signal.h/signal.c. The new place is more logical and allows to use the static helpers in signal.c (see the next changes). While at it, make them return void and remove the valid_signal() check. Nobody checks the returned value, and in-kernel users must not pass the wrong signal number. Signed-off-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Al Viro Cc: David Woodhouse Cc: Frederic Weisbecker Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Mathieu Desnoyers Cc: Richard Weinberger Cc: Steven Rostedt Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 --- include/linux/signal.h | 2 ++ kernel/exit.c | 39 --------------------------------------- kernel/signal.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 31 insertions(+), 42 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8fcd0e6098d..ea74596014a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2414,9 +2414,6 @@ extern void flush_itimer_signals(void); extern void do_group_exit(int); -extern int allow_signal(int); -extern int disallow_signal(int); - extern int do_execve(struct filename *, const char __user * const __user *, const char __user * const __user *); diff --git a/include/linux/signal.h b/include/linux/signal.h index ae744c31463..ac83c593f4b 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -284,6 +284,8 @@ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping); extern void exit_signals(struct task_struct *tsk); +extern void allow_signal(int); +extern void disallow_signal(int); /* * Eventually that'll replace get_signal_to_deliver(); macro for now, diff --git a/kernel/exit.c b/kernel/exit.c index 750c2e59461..e5c4668f179 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -313,45 +313,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) } } -/* - * Let kernel threads use this to say that they allow a certain signal. - * Must not be used if kthread was cloned with CLONE_SIGHAND. - */ -int allow_signal(int sig) -{ - if (!valid_signal(sig) || sig < 1) - return -EINVAL; - - spin_lock_irq(¤t->sighand->siglock); - /* This is only needed for daemonize()'ed kthreads */ - sigdelset(¤t->blocked, sig); - /* - * Kernel threads handle their own signals. Let the signal code - * know it'll be handled, so that they don't get converted to - * SIGKILL or just silently dropped. - */ - current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - return 0; -} - -EXPORT_SYMBOL(allow_signal); - -int disallow_signal(int sig) -{ - if (!valid_signal(sig) || sig < 1) - return -EINVAL; - - spin_lock_irq(¤t->sighand->siglock); - current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - return 0; -} - -EXPORT_SYMBOL(disallow_signal); - #ifdef CONFIG_MEMCG /* * A task is exiting. If it owned this mm, find a new owner for the mm. diff --git a/kernel/signal.c b/kernel/signal.c index a6d8c3af0ad..7d6ff8b1850 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3066,6 +3066,35 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, } #endif +/* + * Let kernel threads use this to say that they allow a certain signal. + * Must not be used if kthread was cloned with CLONE_SIGHAND. + */ +void allow_signal(int sig) +{ + spin_lock_irq(¤t->sighand->siglock); + /* This is only needed for daemonize()'ed kthreads */ + sigdelset(¤t->blocked, sig); + /* + * Kernel threads handle their own signals. Let the signal code + * know it'll be handled, so that they don't get converted to + * SIGKILL or just silently dropped. + */ + current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} +EXPORT_SYMBOL(allow_signal); + +void disallow_signal(int sig) +{ + spin_lock_irq(¤t->sighand->siglock); + current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} +EXPORT_SYMBOL(disallow_signal); + int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *p = current, *t; -- cgit v1.2.3-70-g09d2 From b6220ad66bcd4a50737eb3c08e9466aa44f3bc98 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 Jun 2014 18:05:29 -0700 Subject: sched: Fix compiler warnings Commit 143e1e28cb (sched: Rework sched_domain topology definition) introduced a number of functions with a return value of 'const int'. gcc doesn't know what to do with that and, if the kernel is compiled with W=1, complains with the following warnings whenever sched.h is included. include/linux/sched.h:875:25: warning: type qualifiers ignored on function return type include/linux/sched.h:882:25: warning: type qualifiers ignored on function return type include/linux/sched.h:889:25: warning: type qualifiers ignored on function return type include/linux/sched.h:1002:21: warning: type qualifiers ignored on function return type Commits fb2aa855 (sched, ARM: Create a dedicated scheduler topology table) and 607b45e9a (sched, powerpc: Create a dedicated topology table) introduce the same warning in the arm and powerpc code. Drop 'const' from the function declarations to fix the problem. The fix for all three patches has to be applied together to avoid compilation failures for the affected architectures. Acked-by: Vincent Guittot Acked-by: Benjamin Herrenschmidt Signed-off-by: Guenter Roeck Cc: Russell King Cc: Paul Mackerras Cc: Dietmar Eggemann Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1403658329-13196-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- arch/arm/kernel/topology.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- include/linux/sched.h | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux/sched.h') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 9d853189028..e35d880f977 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -275,7 +275,7 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } -static inline const int cpu_corepower_flags(void) +static inline int cpu_corepower_flags(void) { return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 51a3ff78838..1007fb802e6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -747,7 +747,7 @@ int setup_profiling_timer(unsigned int multiplier) #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymetric SMT dependancy */ -static const int powerpc_smt_flags(void) +static int powerpc_smt_flags(void) { int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; diff --git a/include/linux/sched.h b/include/linux/sched.h index 306f4f0c987..0376b054a0d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -872,21 +872,21 @@ enum cpu_idle_type { #define SD_NUMA 0x4000 /* cross-node balancing */ #ifdef CONFIG_SCHED_SMT -static inline const int cpu_smt_flags(void) +static inline int cpu_smt_flags(void) { return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; } #endif #ifdef CONFIG_SCHED_MC -static inline const int cpu_core_flags(void) +static inline int cpu_core_flags(void) { return SD_SHARE_PKG_RESOURCES; } #endif #ifdef CONFIG_NUMA -static inline const int cpu_numa_flags(void) +static inline int cpu_numa_flags(void) { return SD_NUMA; } @@ -999,7 +999,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); bool cpus_share_cache(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); -typedef const int (*sched_domain_flags_f)(void); +typedef int (*sched_domain_flags_f)(void); #define SDTL_OVERLAP 0x01 -- cgit v1.2.3-70-g09d2