diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 266 |
1 files changed, 170 insertions, 96 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index eaf6751e761..554de400980 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -74,6 +74,8 @@ #include <asm/tlb.h> #include <asm/irq_regs.h> +#include "sched_cpupri.h" + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -151,7 +153,8 @@ static inline int task_has_rt_policy(struct task_struct *p) */ struct rt_prio_array { DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ - struct list_head queue[MAX_RT_PRIO]; + struct list_head xqueue[MAX_RT_PRIO]; /* exclusive queue */ + struct list_head squeue[MAX_RT_PRIO]; /* shared queue */ }; struct rt_bandwidth { @@ -289,15 +292,15 @@ struct task_group root_task_group; static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); /* Default task group's cfs_rq on each cpu */ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; -#endif +#endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; -#endif -#else +#endif /* CONFIG_RT_GROUP_SCHED */ +#else /* !CONFIG_FAIR_GROUP_SCHED */ #define root_task_group init_task_group -#endif +#endif /* CONFIG_FAIR_GROUP_SCHED */ /* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. @@ -307,9 +310,9 @@ static DEFINE_SPINLOCK(task_group_lock); #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_USER_SCHED # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) -#else +#else /* !CONFIG_USER_SCHED */ # define INIT_TASK_GROUP_LOAD NICE_0_LOAD -#endif +#endif /* CONFIG_USER_SCHED */ /* * A weight of 0 or 1 can cause arithmetics problems. @@ -452,6 +455,9 @@ struct root_domain { */ cpumask_t rto_mask; atomic_t rto_count; +#ifdef CONFIG_SMP + struct cpupri cpupri; +#endif }; /* @@ -526,6 +532,7 @@ struct rq { int push_cpu; /* cpu of this runqueue: */ int cpu; + int online; struct task_struct *migration_thread; struct list_head migration_queue; @@ -1127,6 +1134,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) return HRTIMER_NORESTART; } +#ifdef CONFIG_SMP static void hotplug_hrtick_disable(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -1182,6 +1190,7 @@ static void init_hrtick(void) { hotcpu_notifier(hotplug_hrtick, 0); } +#endif /* CONFIG_SMP */ static void init_rq_hrtick(struct rq *rq) { @@ -1311,15 +1320,15 @@ void wake_up_idle_cpu(int cpu) if (!tsk_is_polling(rq->idle)) smp_send_reschedule(cpu); } -#endif +#endif /* CONFIG_NO_HZ */ -#else +#else /* !CONFIG_SMP */ static void __resched_task(struct task_struct *p, int tif_bit) { assert_spin_locked(&task_rq(p)->lock); set_tsk_thread_flag(p, tif_bit); } -#endif +#endif /* CONFIG_SMP */ #if BITS_PER_LONG == 32 # define WMULT_CONST (~0UL) @@ -1479,16 +1488,8 @@ static unsigned long source_load(int cpu, int type); static unsigned long target_load(int cpu, int type); static unsigned long cpu_avg_load_per_task(int cpu); static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); -#else /* CONFIG_SMP */ - -#ifdef CONFIG_FAIR_GROUP_SCHED -static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) -{ -} #endif -#endif /* CONFIG_SMP */ - #include "sched_stats.h" #include "sched_idletask.c" #include "sched_fair.c" @@ -1498,6 +1499,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) #endif #define sched_class_highest (&rt_sched_class) +#define for_each_class(class) \ + for (class = sched_class_highest; class; class = class->next) static inline void inc_load(struct rq *rq, const struct task_struct *p) { @@ -1634,12 +1637,6 @@ inline int task_curr(const struct task_struct *p) return cpu_curr(task_cpu(p)) == p; } -/* Used instead of source_load when we know the type == 0 */ -unsigned long weighted_cpuload(const int cpu) -{ - return cpu_rq(cpu)->load.weight; -} - static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) { set_task_rq(p, cpu); @@ -1668,6 +1665,12 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, #ifdef CONFIG_SMP +/* Used instead of source_load when we know the type == 0 */ +static unsigned long weighted_cpuload(const int cpu) +{ + return cpu_rq(cpu)->load.weight; +} + /* * Is this task likely cache-hot: */ @@ -2129,7 +2132,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) } } } -#endif +#endif /* CONFIG_SCHEDSTATS */ out_activate: #endif /* CONFIG_SMP */ @@ -2329,7 +2332,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, notifier->ops->sched_out(notifier, next); } -#else +#else /* !CONFIG_PREEMPT_NOTIFIERS */ static void fire_sched_in_preempt_notifiers(struct task_struct *curr) { @@ -2341,7 +2344,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, { } -#endif +#endif /* CONFIG_PREEMPT_NOTIFIERS */ /** * prepare_task_switch - prepare to switch tasks @@ -3670,6 +3673,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) /* Earliest time when we have to do rebalance again */ unsigned long next_balance = jiffies + 60*HZ; int update_next_balance = 0; + int need_serialize; cpumask_t tmp; for_each_domain(cpu, sd) { @@ -3687,8 +3691,9 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) if (interval > HZ*NR_CPUS/10) interval = HZ*NR_CPUS/10; + need_serialize = sd->flags & SD_SERIALIZE; - if (sd->flags & SD_SERIALIZE) { + if (need_serialize) { if (!spin_trylock(&balancing)) goto out; } @@ -3704,7 +3709,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) } sd->last_balance = jiffies; } - if (sd->flags & SD_SERIALIZE) + if (need_serialize) spin_unlock(&balancing); out: if (time_after(next_balance, sd->last_balance + interval)) { @@ -4068,6 +4073,7 @@ static noinline void __schedule_bug(struct task_struct *prev) prev->comm, prev->pid, preempt_count()); debug_show_held_locks(prev); + print_modules(); if (irqs_disabled()) print_irqtrace_events(prev); @@ -4141,7 +4147,7 @@ asmlinkage void __sched schedule(void) struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; - int cpu; + int cpu, hrtick = sched_feat(HRTICK); need_resched: preempt_disable(); @@ -4156,7 +4162,8 @@ need_resched_nonpreemptible: schedule_debug(prev); - hrtick_clear(rq); + if (hrtick) + hrtick_clear(rq); /* * Do the rq-clock update outside the rq lock: @@ -4202,7 +4209,8 @@ need_resched_nonpreemptible: } else spin_unlock_irq(&rq->lock); - hrtick_set(rq); + if (hrtick) + hrtick_set(rq); if (unlikely(reacquire_kernel_lock(current) < 0)) goto need_resched_nonpreemptible; @@ -5070,24 +5078,6 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, return sched_setaffinity(pid, &new_mask); } -/* - * Represents all cpu's present in the system - * In systems capable of hotplug, this map could dynamically grow - * as new cpu's are detected in the system via any platform specific - * method, such as ACPI for e.g. - */ - -cpumask_t cpu_present_map __read_mostly; -EXPORT_SYMBOL(cpu_present_map); - -#ifndef CONFIG_SMP -cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_online_map); - -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_possible_map); -#endif - long sched_getaffinity(pid_t pid, cpumask_t *mask) { struct task_struct *p; @@ -5571,6 +5561,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) goto out; } + if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && + !cpus_equal(p->cpus_allowed, *new_mask))) { + ret = -EINVAL; + goto out; + } + if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); else { @@ -6058,6 +6054,36 @@ static void unregister_sched_domain_sysctl(void) } #endif +static void set_rq_online(struct rq *rq) +{ + if (!rq->online) { + const struct sched_class *class; + + cpu_set(rq->cpu, rq->rd->online); + rq->online = 1; + + for_each_class(class) { + if (class->rq_online) + class->rq_online(rq); + } + } +} + +static void set_rq_offline(struct rq *rq) +{ + if (rq->online) { + const struct sched_class *class; + + for_each_class(class) { + if (class->rq_offline) + class->rq_offline(rq); + } + + cpu_clear(rq->cpu, rq->rd->online); + rq->online = 0; + } +} + /* * migration_call - callback that gets triggered when a CPU is added. * Here we can start up the necessary migration thread for the new CPU. @@ -6095,7 +6121,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { BUG_ON(!cpu_isset(cpu, rq->rd->span)); - cpu_set(cpu, rq->rd->online); + + set_rq_online(rq); } spin_unlock_irqrestore(&rq->lock, flags); break; @@ -6156,7 +6183,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { BUG_ON(!cpu_isset(cpu, rq->rd->span)); - cpu_clear(cpu, rq->rd->online); + set_rq_offline(rq); } spin_unlock_irqrestore(&rq->lock, flags); break; @@ -6190,6 +6217,28 @@ void __init migration_init(void) #ifdef CONFIG_SCHED_DEBUG +static inline const char *sd_level_to_string(enum sched_domain_level lvl) +{ + switch (lvl) { + case SD_LV_NONE: + return "NONE"; + case SD_LV_SIBLING: + return "SIBLING"; + case SD_LV_MC: + return "MC"; + case SD_LV_CPU: + return "CPU"; + case SD_LV_NODE: + return "NODE"; + case SD_LV_ALLNODES: + return "ALLNODES"; + case SD_LV_MAX: + return "MAX"; + + } + return "MAX"; +} + static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpumask_t *groupmask) { @@ -6209,7 +6258,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, return -1; } - printk(KERN_CONT "span %s\n", str); + printk(KERN_CONT "span %s level %s\n", + str, sd_level_to_string(sd->level)); if (!cpu_isset(cpu, sd->span)) { printk(KERN_ERR "ERROR: domain->span does not contain " @@ -6293,9 +6343,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) } kfree(groupmask); } -#else +#else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) -#endif +#endif /* CONFIG_SCHED_DEBUG */ static int sd_degenerate(struct sched_domain *sd) { @@ -6355,20 +6405,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) static void rq_attach_root(struct rq *rq, struct root_domain *rd) { unsigned long flags; - const struct sched_class *class; spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { struct root_domain *old_rd = rq->rd; - for (class = sched_class_highest; class; class = class->next) { - if (class->leave_domain) - class->leave_domain(rq); - } + if (cpu_isset(rq->cpu, old_rd->online)) + set_rq_offline(rq); cpu_clear(rq->cpu, old_rd->span); - cpu_clear(rq->cpu, old_rd->online); if (atomic_dec_and_test(&old_rd->refcount)) kfree(old_rd); @@ -6379,12 +6425,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) cpu_set(rq->cpu, rd->span); if (cpu_isset(rq->cpu, cpu_online_map)) - cpu_set(rq->cpu, rd->online); - - for (class = sched_class_highest; class; class = class->next) { - if (class->join_domain) - class->join_domain(rq); - } + set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); } @@ -6395,6 +6436,8 @@ static void init_rootdomain(struct root_domain *rd) cpus_clear(rd->span); cpus_clear(rd->online); + + cpupri_init(&rd->cpupri); } static void init_defrootdomain(void) @@ -6589,7 +6632,7 @@ static void sched_domain_node_span(int node, cpumask_t *span) cpus_or(*span, *span, *nodemask); } } -#endif +#endif /* CONFIG_NUMA */ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; @@ -6608,7 +6651,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, *sg = &per_cpu(sched_group_cpus, cpu); return cpu; } -#endif +#endif /* CONFIG_SCHED_SMT */ /* * multi-core sched-domains: @@ -6616,7 +6659,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, #ifdef CONFIG_SCHED_MC static DEFINE_PER_CPU(struct sched_domain, core_domains); static DEFINE_PER_CPU(struct sched_group, sched_group_core); -#endif +#endif /* CONFIG_SCHED_MC */ #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) static int @@ -6718,7 +6761,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) sg = sg->next; } while (sg != group_head); } -#endif +#endif /* CONFIG_NUMA */ #ifdef CONFIG_NUMA /* Free memory allocated for various sched_group structures */ @@ -6755,11 +6798,11 @@ next_sg: sched_group_nodes_bycpu[cpu] = NULL; } } -#else +#else /* !CONFIG_NUMA */ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) { } -#endif +#endif /* CONFIG_NUMA */ /* * Initialize sched groups cpu_power. @@ -7236,6 +7279,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void) } /* + * Free current domain masks. + * Called after all cpus are attached to NULL domain. + */ +static void free_sched_domains(void) +{ + ndoms_cur = 0; + if (doms_cur != &fallback_doms) + kfree(doms_cur); + doms_cur = &fallback_doms; +} + +/* * Set up scheduler domains and groups. Callers must hold the hotplug lock. * For now this just excludes isolated cpus, but could be used to * exclude other special cases in the future. @@ -7382,6 +7437,7 @@ int arch_reinit_sched_domains(void) get_online_cpus(); mutex_lock(&sched_domains_mutex); detach_destroy_domains(&cpu_online_map); + free_sched_domains(); err = arch_init_sched_domains(&cpu_online_map); mutex_unlock(&sched_domains_mutex); put_online_cpus(); @@ -7450,7 +7506,7 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) #endif return err; } -#endif +#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ /* * Force a reinitialization of the sched domains hierarchy. The domains @@ -7461,20 +7517,28 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) static int update_sched_domains(struct notifier_block *nfb, unsigned long action, void *hcpu) { + int cpu = (int)(long)hcpu; + switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: + disable_runtime(cpu_rq(cpu)); + /* fall-through */ + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: detach_destroy_domains(&cpu_online_map); + free_sched_domains(); return NOTIFY_OK; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: case CPU_ONLINE: case CPU_ONLINE_FROZEN: + enable_runtime(cpu_rq(cpu)); + /* fall-through */ + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: /* @@ -7485,8 +7549,16 @@ static int update_sched_domains(struct notifier_block *nfb, return NOTIFY_DONE; } +#ifndef CONFIG_CPUSETS + /* + * Create default domain partitioning if cpusets are disabled. + * Otherwise we let cpusets rebuild the domains based on the + * current setup. + */ + /* The hotplug lock is already held by cpu_up/cpu_down */ arch_init_sched_domains(&cpu_online_map); +#endif return NOTIFY_OK; } @@ -7548,7 +7620,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) array = &rt_rq->active; for (i = 0; i < MAX_RT_PRIO; i++) { - INIT_LIST_HEAD(array->queue + i); + INIT_LIST_HEAD(array->xqueue + i); + INIT_LIST_HEAD(array->squeue + i); __clear_bit(i, array->bitmap); } /* delimiter for bitsearch: */ @@ -7667,8 +7740,8 @@ void __init sched_init(void) root_task_group.cfs_rq = (struct cfs_rq **)ptr; ptr += nr_cpu_ids * sizeof(void **); -#endif -#endif +#endif /* CONFIG_USER_SCHED */ +#endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED init_task_group.rt_se = (struct sched_rt_entity **)ptr; ptr += nr_cpu_ids * sizeof(void **); @@ -7682,8 +7755,8 @@ void __init sched_init(void) root_task_group.rt_rq = (struct rt_rq **)ptr; ptr += nr_cpu_ids * sizeof(void **); -#endif -#endif +#endif /* CONFIG_USER_SCHED */ +#endif /* CONFIG_RT_GROUP_SCHED */ } #ifdef CONFIG_SMP @@ -7699,8 +7772,8 @@ void __init sched_init(void) #ifdef CONFIG_USER_SCHED init_rt_bandwidth(&root_task_group.rt_bandwidth, global_rt_period(), RUNTIME_INF); -#endif -#endif +#endif /* CONFIG_USER_SCHED */ +#endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_GROUP_SCHED list_add(&init_task_group.list, &task_groups); @@ -7710,8 +7783,8 @@ void __init sched_init(void) INIT_LIST_HEAD(&root_task_group.children); init_task_group.parent = &root_task_group; list_add(&init_task_group.siblings, &root_task_group.children); -#endif -#endif +#endif /* CONFIG_USER_SCHED */ +#endif /* CONFIG_GROUP_SCHED */ for_each_possible_cpu(i) { struct rq *rq; @@ -7791,6 +7864,7 @@ void __init sched_init(void) rq->next_balance = jiffies; rq->push_cpu = 0; rq->cpu = i; + rq->online = 0; rq->migration_thread = NULL; INIT_LIST_HEAD(&rq->migration_queue); rq_attach_root(rq, &def_root_domain); @@ -8030,7 +8104,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) { list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); } -#else +#else /* !CONFG_FAIR_GROUP_SCHED */ static inline void free_fair_sched_group(struct task_group *tg) { } @@ -8048,7 +8122,7 @@ static inline void register_fair_sched_group(struct task_group *tg, int cpu) static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) { } -#endif +#endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED static void free_rt_sched_group(struct task_group *tg) @@ -8119,7 +8193,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) { list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); } -#else +#else /* !CONFIG_RT_GROUP_SCHED */ static inline void free_rt_sched_group(struct task_group *tg) { } @@ -8137,7 +8211,7 @@ static inline void register_rt_sched_group(struct task_group *tg, int cpu) static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) { } -#endif +#endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_GROUP_SCHED static void free_sched_group(struct task_group *tg) @@ -8248,7 +8322,7 @@ void sched_move_task(struct task_struct *tsk) task_rq_unlock(rq, &flags); } -#endif +#endif /* CONFIG_GROUP_SCHED */ #ifdef CONFIG_FAIR_GROUP_SCHED static void set_se_shares(struct sched_entity *se, unsigned long shares) @@ -8498,7 +8572,7 @@ static int sched_rt_global_constraints(void) return ret; } -#else +#else /* !CONFIG_RT_GROUP_SCHED */ static int sched_rt_global_constraints(void) { unsigned long flags; @@ -8516,7 +8590,7 @@ static int sched_rt_global_constraints(void) return 0; } -#endif +#endif /* CONFIG_RT_GROUP_SCHED */ int sched_rt_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, @@ -8624,7 +8698,7 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) return (u64) tg->shares; } -#endif +#endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, @@ -8648,7 +8722,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft) { return sched_group_rt_period(cgroup_tg(cgrp)); } -#endif +#endif /* CONFIG_RT_GROUP_SCHED */ static struct cftype cpu_files[] = { #ifdef CONFIG_FAIR_GROUP_SCHED |