diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/Kconfig | 51 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 61 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 41 |
3 files changed, 103 insertions, 50 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 70f27e89012..2b62fe86f9e 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -105,7 +105,6 @@ config NO_HZ_FULL select RCU_USER_QS select RCU_NOCB_CPU select VIRT_CPU_ACCOUNTING_GEN - select CONTEXT_TRACKING_FORCE select IRQ_WORK help Adaptively try to shutdown the tick whenever possible, even when @@ -134,6 +133,56 @@ config NO_HZ_FULL_ALL Note the boot CPU will still be kept outside the range to handle the timekeeping duty. +config NO_HZ_FULL_SYSIDLE + bool "Detect full-system idle state for full dynticks system" + depends on NO_HZ_FULL + default n + help + At least one CPU must keep the scheduling-clock tick running for + timekeeping purposes whenever there is a non-idle CPU, where + "non-idle" also includes dynticks CPUs as long as they are + running non-idle tasks. Because the underlying adaptive-tick + support cannot distinguish between all CPUs being idle and + all CPUs each running a single task in dynticks mode, the + underlying support simply ensures that there is always a CPU + handling the scheduling-clock tick, whether or not all CPUs + are idle. This Kconfig option enables scalable detection of + the all-CPUs-idle state, thus allowing the scheduling-clock + tick to be disabled when all CPUs are idle. Note that scalable + detection of the all-CPUs-idle state means that larger systems + will be slower to declare the all-CPUs-idle state. + + Say Y if you would like to help debug all-CPUs-idle detection. + + Say N if you are unsure. + +config NO_HZ_FULL_SYSIDLE_SMALL + int "Number of CPUs above which large-system approach is used" + depends on NO_HZ_FULL_SYSIDLE + range 1 NR_CPUS + default 8 + help + The full-system idle detection mechanism takes a lazy approach + on large systems, as is required to attain decent scalability. + However, on smaller systems, scalability is not anywhere near as + large a concern as is energy efficiency. The sysidle subsystem + therefore uses a fast but non-scalable algorithm for small + systems and a lazier but scalable algorithm for large systems. + This Kconfig parameter defines the number of CPUs in the largest + system that will be considered to be "small". + + The default value will be fine in most cases. Battery-powered + systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger + numbers of CPUs, and (3) are suffering from battery-lifetime + problems due to long sysidle latencies might wish to experiment + with larger values for this Kconfig parameter. On the other + hand, they might be even better served by disabling NO_HZ_FULL + entirely, given that NO_HZ_FULL is intended for HPC and + real-time workloads that at present do not tend to be run on + battery-powered systems. + + Take the default if you are unsure. + config NO_HZ bool "Old Idle dynticks config" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e8a1516cc0a..3612fc77f83 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -23,6 +23,7 @@ #include <linux/irq_work.h> #include <linux/posix-timers.h> #include <linux/perf_event.h> +#include <linux/context_tracking.h> #include <asm/irq_regs.h> @@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) } #ifdef CONFIG_NO_HZ_FULL -static cpumask_var_t nohz_full_mask; -bool have_nohz_full_mask; +cpumask_var_t tick_nohz_full_mask; +bool tick_nohz_full_running; static bool can_stop_full_tick(void) { @@ -182,7 +183,7 @@ static bool can_stop_full_tick(void) * Don't allow the user to think they can get * full NO_HZ with this machine. */ - WARN_ONCE(have_nohz_full_mask, + WARN_ONCE(tick_nohz_full_running, "NO_HZ FULL will not work with unstable sched clock"); return false; } @@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); * Re-evaluate the need for the tick on the current CPU * and restart it if necessary. */ -void tick_nohz_full_check(void) +void __tick_nohz_full_check(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); @@ -211,7 +212,7 @@ void tick_nohz_full_check(void) static void nohz_full_kick_work_func(struct irq_work *work) { - tick_nohz_full_check(); + __tick_nohz_full_check(); } static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { @@ -230,7 +231,7 @@ void tick_nohz_full_kick(void) static void nohz_full_kick_ipi(void *info) { - tick_nohz_full_check(); + __tick_nohz_full_check(); } /* @@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info) */ void tick_nohz_full_kick_all(void) { - if (!have_nohz_full_mask) + if (!tick_nohz_full_running) return; preempt_disable(); - smp_call_function_many(nohz_full_mask, + smp_call_function_many(tick_nohz_full_mask, nohz_full_kick_ipi, NULL, false); + tick_nohz_full_kick(); preempt_enable(); } @@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void) * It might need the tick due to per task/process properties: * perf events, posix cpu timers, ... */ -void tick_nohz_task_switch(struct task_struct *tsk) +void __tick_nohz_task_switch(struct task_struct *tsk) { unsigned long flags; @@ -269,31 +271,23 @@ out: local_irq_restore(flags); } -int tick_nohz_full_cpu(int cpu) -{ - if (!have_nohz_full_mask) - return 0; - - return cpumask_test_cpu(cpu, nohz_full_mask); -} - /* Parse the boot-time nohz CPU list from the kernel parameters. */ static int __init tick_nohz_full_setup(char *str) { int cpu; - alloc_bootmem_cpumask_var(&nohz_full_mask); - if (cpulist_parse(str, nohz_full_mask) < 0) { + alloc_bootmem_cpumask_var(&tick_nohz_full_mask); + if (cpulist_parse(str, tick_nohz_full_mask) < 0) { pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); return 1; } cpu = smp_processor_id(); - if (cpumask_test_cpu(cpu, nohz_full_mask)) { + if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); - cpumask_clear_cpu(cpu, nohz_full_mask); + cpumask_clear_cpu(cpu, tick_nohz_full_mask); } - have_nohz_full_mask = true; + tick_nohz_full_running = true; return 1; } @@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, * If we handle the timekeeping duty for full dynticks CPUs, * we can't safely shutdown that CPU. */ - if (have_nohz_full_mask && tick_do_timer_cpu == cpu) + if (tick_nohz_full_running && tick_do_timer_cpu == cpu) return NOTIFY_BAD; break; } @@ -330,31 +324,34 @@ static int tick_nohz_init_all(void) int err = -1; #ifdef CONFIG_NO_HZ_FULL_ALL - if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { + if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); return err; } err = 0; - cpumask_setall(nohz_full_mask); - cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); - have_nohz_full_mask = true; + cpumask_setall(tick_nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); + tick_nohz_full_running = true; #endif return err; } void __init tick_nohz_init(void) { - if (!have_nohz_full_mask) { + int cpu; + + if (!tick_nohz_full_running) { if (tick_nohz_init_all() < 0) return; } + for_each_cpu(cpu, tick_nohz_full_mask) + context_tracking_cpu_set(cpu); + cpu_notifier(tick_nohz_cpu_down_callback, 0); - cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); } -#else -#define have_nohz_full_mask (0) #endif /* @@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } - if (have_nohz_full_mask) { + if (tick_nohz_full_enabled()) { /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3bdf2832301..61ed862cdd3 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -265,10 +265,9 @@ static inline void timer_list_header(struct seq_file *m, u64 now) static int timer_list_show(struct seq_file *m, void *v) { struct timer_list_iter *iter = v; - u64 now = ktime_to_ns(ktime_get()); if (iter->cpu == -1 && !iter->second_pass) - timer_list_header(m, now); + timer_list_header(m, iter->now); else if (!iter->second_pass) print_cpu(m, iter->cpu, iter->now); #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -298,33 +297,41 @@ void sysrq_timer_list_show(void) return; } -static void *timer_list_start(struct seq_file *file, loff_t *offset) +static void *move_iter(struct timer_list_iter *iter, loff_t offset) { - struct timer_list_iter *iter = file->private; - - if (!*offset) { - iter->cpu = -1; - iter->now = ktime_to_ns(ktime_get()); - } else if (iter->cpu >= nr_cpu_ids) { + for (; offset; offset--) { + iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); + if (iter->cpu >= nr_cpu_ids) { #ifdef CONFIG_GENERIC_CLOCKEVENTS - if (!iter->second_pass) { - iter->cpu = -1; - iter->second_pass = true; - } else - return NULL; + if (!iter->second_pass) { + iter->cpu = -1; + iter->second_pass = true; + } else + return NULL; #else - return NULL; + return NULL; #endif + } } return iter; } +static void *timer_list_start(struct seq_file *file, loff_t *offset) +{ + struct timer_list_iter *iter = file->private; + + if (!*offset) + iter->now = ktime_to_ns(ktime_get()); + iter->cpu = -1; + iter->second_pass = false; + return move_iter(iter, *offset); +} + static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset) { struct timer_list_iter *iter = file->private; - iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); ++*offset; - return timer_list_start(file, offset); + return move_iter(iter, 1); } static void timer_list_stop(struct seq_file *seq, void *v) |