From 1c20091e77fc5a9b7d7d905176443b4822a23cdb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Wake up full dynticks CPUs when a timer gets enqueued Wake up a CPU when a timer list timer is enqueued there and the target is part of the full dynticks range. Sending an IPI to it makes it reconsidering the next timer to program on top of recent updates. This may later be improved by checking if the tick is really stopped on the target. This would need some careful synchronization though. So deal with such optimization later and start simple. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/timer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index dbf7a78a1ef..4e3040b40d1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -930,14 +930,14 @@ void add_timer_on(struct timer_list *timer, int cpu) debug_activate(timer, timer->expires); internal_add_timer(base, timer); /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling + * Check whether the other CPU is in dynticks mode and needs + * to be triggered to reevaluate the timer wheel. + * We are protected against the other CPU fiddling * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. + * makes sure that a CPU on the way to stop its tick can not + * evaluate the timer wheel. */ - wake_up_idle_cpu(cpu); + wake_up_nohz_cpu(cpu); spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); -- cgit v1.2.3-70-g09d2 From 3451d0243c3cdfd729b36f9684a14659d4895ca3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Rename CONFIG_NO_HZ to CONFIG_NO_HZ_COMMON We are planning to convert the dynticks Kconfig options layout into a choice menu. The user must be able to easily pick any of the following implementations: constant periodic tick, idle dynticks, full dynticks. As this implies a mutual exclusion, the two dynticks implementions need to converge on the selection of a common Kconfig option in order to ease the sharing of a common infrastructure. It would thus seem pretty natural to reuse CONFIG_NO_HZ to that end. It already implements all the idle dynticks code and the full dynticks depends on all that code for now. So ideally the choice menu would propose CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED then both would select CONFIG_NO_HZ. On the other hand we want to stay backward compatible: if CONFIG_NO_HZ is set in an older config file, we want to enable CONFIG_NO_HZ_IDLE by default. But we can't afford both at the same time or we run into a circular dependency: 1) CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED both select CONFIG_NO_HZ 2) If CONFIG_NO_HZ is set, we default to CONFIG_NO_HZ_IDLE We might be able to support that from Kconfig/Kbuild but it may not be wise to introduce such a confusing behaviour. So to solve this, create a new CONFIG_NO_HZ_COMMON option which gathers the common code between idle and full dynticks (that common code for now is simply the idle dynticks code) and select it from their referring Kconfig. Then we'll later create CONFIG_NO_HZ_IDLE and map CONFIG_NO_HZ to it for backward compatibility. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/RCU/stallwarn.txt | 2 +- Documentation/cpu-freq/governors.txt | 4 ++-- arch/um/include/shared/common-offsets.h | 4 ++-- arch/um/os-Linux/time.c | 2 +- include/linux/sched.h | 8 ++++---- include/linux/tick.h | 8 ++++---- init/Kconfig | 2 +- kernel/hrtimer.c | 4 ++-- kernel/sched/core.c | 18 +++++++++--------- kernel/sched/fair.c | 10 +++++----- kernel/sched/sched.h | 4 ++-- kernel/softirq.c | 2 +- kernel/time/Kconfig | 13 +++++++++---- kernel/time/tick-sched.c | 12 ++++++------ kernel/timer.c | 4 ++-- 15 files changed, 51 insertions(+), 46 deletions(-) (limited to 'kernel/timer.c') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1927151b386..b336755b71e 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -176,7 +176,7 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that o A hardware or software issue shuts off the scheduler-clock interrupt on a CPU that is not in dyntick-idle mode. This problem really has happened, and seems to be most likely to - result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. + result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. o A bug in the RCU implementation. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index c7a2eb8450c..e3e5d9ae50c 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -131,8 +131,8 @@ sampling_rate_min: The sampling rate is limited by the HW transition latency: transition_latency * 100 Or by kernel restrictions: -If CONFIG_NO_HZ is set, the limit is 10ms fixed. -If CONFIG_NO_HZ is not set or nohz=off boot parameter is used, the +If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed. +If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the limits depend on the CONFIG_HZ option: HZ=1000: min=20000us (20ms) HZ=250: min=80000us (80ms) diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 2df313b6a58..c9230680902 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -30,8 +30,8 @@ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); #ifdef CONFIG_PRINTK DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); #endif -#ifdef CONFIG_NO_HZ -DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ); +#ifdef CONFIG_NO_HZ_COMMON +DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON); #endif #ifdef CONFIG_UML_X86 DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index fac388cb464..e9824d5dd7d 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -79,7 +79,7 @@ long long os_nsecs(void) return timeval_to_ns(&tv); } -#ifdef UML_CONFIG_NO_HZ +#ifdef UML_CONFIG_NO_HZ_COMMON static int after_sleep_interval(struct timespec *ts) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index 10626e2ee68..1ff9e0a5de2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -230,7 +230,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); @@ -1758,13 +1758,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON void calc_load_enter_idle(void); void calc_load_exit_idle(void); #else static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) @@ -1850,7 +1850,7 @@ extern void idle_task_exit(void); static inline void idle_task_exit(void) {} #endif -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) extern void wake_up_nohz_cpu(int cpu); #else static inline void wake_up_nohz_cpu(int cpu) { } diff --git a/include/linux/tick.h b/include/linux/tick.h index 44bfa8aa439..5e403339ee1 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -82,7 +82,7 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_setup_sched_timer(void); # endif -# if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); # else static inline void tick_cancel_sched_timer(int cpu) { } @@ -123,7 +123,7 @@ static inline void tick_check_idle(int cpu) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_NO_HZ +# ifdef CONFIG_NO_HZ_COMMON DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); static inline int tick_nohz_tick_stopped(void) @@ -138,7 +138,7 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else /* !CONFIG_NO_HZ */ +# else /* !CONFIG_NO_HZ_COMMON */ static inline int tick_nohz_tick_stopped(void) { return 0; @@ -155,7 +155,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !NO_HZ */ +# endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_EXTENDED extern int tick_nohz_extended_cpu(int cpu); diff --git a/init/Kconfig b/init/Kconfig index 8a1dac2f80a..edc8132584f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -580,7 +580,7 @@ config RCU_FANOUT_EXACT config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ && SMP + depends on NO_HZ_COMMON && SMP default n help This option causes RCU to attempt to accelerate grace periods in diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3fe..ec60482d8b0 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -160,7 +160,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, */ static int hrtimer_get_target(int this_cpu, int pinned) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif @@ -1106,7 +1106,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /** * hrtimer_get_next_event - get the time until next expiry event * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e91ee589f79..9bb397da63d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -549,7 +549,7 @@ void resched_cpu(int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * In the semi idle case, use the nearest busy cpu for migrating timers * from an idle cpu. This is good for power-savings. @@ -641,14 +641,14 @@ static inline bool got_nohz_idle_kick(void) return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } -#else /* CONFIG_NO_HZ */ +#else /* CONFIG_NO_HZ_COMMON */ static inline bool got_nohz_idle_kick(void) { return false; } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ void sched_avg_update(struct rq *rq) { @@ -2139,7 +2139,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) return load >> FSHIFT; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Handle NO_HZ for the global load-average. * @@ -2365,12 +2365,12 @@ static void calc_global_nohz(void) smp_wmb(); calc_load_idx++; } -#else /* !CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ static inline long calc_load_fold_idle(void) { return 0; } static inline void calc_global_nohz(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2530,7 +2530,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading @@ -2590,7 +2590,7 @@ void update_cpu_load_nohz(void) } raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from scheduler_tick() @@ -7023,7 +7023,7 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 539760ef00c..5c97fca091a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5331,7 +5331,7 @@ out_unlock: return 0; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details * - When one of the busy CPUs notice that there may be an idle rebalancing @@ -5541,9 +5541,9 @@ out: rq->next_balance = next_balance; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* - * In CONFIG_NO_HZ case, the idle balance kickee will do the + * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. */ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) @@ -5686,7 +5686,7 @@ void trigger_load_balance(struct rq *rq, int cpu) if (time_after_eq(jiffies, rq->next_balance) && likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); #endif @@ -6156,7 +6156,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); cpu_notifier(sched_ilb_notifier, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3bd15a43eeb..889904dd6d7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -404,7 +404,7 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif @@ -1333,7 +1333,7 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); extern void account_cfs_bandwidth_used(int enabled, int was_enabled); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, diff --git a/kernel/softirq.c b/kernel/softirq.c index b4d252fd195..de15813f2a6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -348,7 +348,7 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* Make sure that timer wheel updates are propagated */ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_irq_exit(); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 726c33e00da..c88fc43494c 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -64,16 +64,21 @@ config GENERIC_CMOS_UPDATE if GENERIC_CLOCKEVENTS menu "Timers subsystem" -# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independ of this. config TICK_ONESHOT bool +config NO_HZ_COMMON + bool + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + select TICK_ONESHOT + config NO_HZ bool "Tickless System (Dynamic Ticks)" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS - select TICK_ONESHOT + select NO_HZ_COMMON help This option enables a tickless system: timer interrupts will only trigger on an as-needed basis both when the system is @@ -81,14 +86,14 @@ config NO_HZ config NO_HZ_EXTENDED bool "Full dynticks system" - # NO_HZ dependency + # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # RCU_USER_QS depends on HAVE_CONTEXT_TRACKING && SMP # RCU_NOCB_CPU dependency depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN - select NO_HZ + select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 57bb3fe5aaa..ccfc2086cd4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -104,7 +104,7 @@ static void tick_sched_do_timer(ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went @@ -124,7 +124,7 @@ static void tick_sched_do_timer(ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -235,7 +235,7 @@ core_initcall(init_tick_nohz_extended); /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ @@ -907,7 +907,7 @@ static inline void tick_check_nohz(int cpu) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() @@ -992,14 +992,14 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); diff --git a/kernel/timer.c b/kernel/timer.c index 4e3040b40d1..1b7489fdea4 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -738,7 +738,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, cpu = smp_processor_id(); -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) cpu = get_nohz_timer_target(); #endif @@ -1188,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) spin_unlock_irq(&base->lock); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a CPU is idle. -- cgit v1.2.3-70-g09d2 From 1a0df59444972105f0d4c2b0c16ce414d70c420a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Apr 2013 15:27:34 -0700 Subject: kernel/compat.c: make do_sysinfo() static The only use outside of kernel/timer.c was in kernel/compat.c, so move compat_sys_sysinfo() next to sys_sysinfo() in kernel/timer.c. Signed-off-by: Stephen Rothwell Cc: Thomas Gleixner Cc: Guenter Roeck Cc: Al Viro Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 -- kernel/compat.c | 65 ---------------------------------------------- kernel/timer.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 68 deletions(-) (limited to 'kernel/timer.c') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2dac79c3919..6d1844f393c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -798,6 +798,4 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif -extern int do_sysinfo(struct sysinfo *info); - #endif diff --git a/kernel/compat.c b/kernel/compat.c index 19971d8c729..1e8f1455117 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1138,71 +1138,6 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, } #endif -struct compat_sysinfo { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - u16 procs; - u16 pad; - u32 totalhigh; - u32 freehigh; - u32 mem_unit; - char _f[20-2*sizeof(u32)-sizeof(int)]; -}; - -asmlinkage long -compat_sys_sysinfo(struct compat_sysinfo __user *info) -{ - struct sysinfo s; - - do_sysinfo(&s); - - /* Check to see if any memory value is too large for 32-bit and scale - * down if needed - */ - if ((s.totalram >> 32) || (s.totalswap >> 32)) { - int bitcount = 0; - - while (s.mem_unit < PAGE_SIZE) { - s.mem_unit <<= 1; - bitcount++; - } - - s.totalram >>= bitcount; - s.freeram >>= bitcount; - s.sharedram >>= bitcount; - s.bufferram >>= bitcount; - s.totalswap >>= bitcount; - s.freeswap >>= bitcount; - s.totalhigh >>= bitcount; - s.freehigh >>= bitcount; - } - - if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || - __put_user (s.uptime, &info->uptime) || - __put_user (s.loads[0], &info->loads[0]) || - __put_user (s.loads[1], &info->loads[1]) || - __put_user (s.loads[2], &info->loads[2]) || - __put_user (s.totalram, &info->totalram) || - __put_user (s.freeram, &info->freeram) || - __put_user (s.sharedram, &info->sharedram) || - __put_user (s.bufferram, &info->bufferram) || - __put_user (s.totalswap, &info->totalswap) || - __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs) || - __put_user (s.totalhigh, &info->totalhigh) || - __put_user (s.freehigh, &info->freehigh) || - __put_user (s.mem_unit, &info->mem_unit)) - return -EFAULT; - - return 0; -} - COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, compat_pid_t, pid, struct compat_timespec __user *, interval) diff --git a/kernel/timer.c b/kernel/timer.c index dbf7a78a1ef..06b3245624e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1567,7 +1568,7 @@ SYSCALL_DEFINE0(gettid) * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill */ -int do_sysinfo(struct sysinfo *info) +static int do_sysinfo(struct sysinfo *info) { unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; @@ -1642,6 +1643,73 @@ SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) return 0; } +#ifdef CONFIG_COMPAT +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +asmlinkage long +compat_sys_sysinfo(struct compat_sysinfo __user *info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user (s.uptime, &info->uptime) || + __put_user (s.loads[0], &info->loads[0]) || + __put_user (s.loads[1], &info->loads[1]) || + __put_user (s.loads[2], &info->loads[2]) || + __put_user (s.totalram, &info->totalram) || + __put_user (s.freeram, &info->freeram) || + __put_user (s.sharedram, &info->sharedram) || + __put_user (s.bufferram, &info->bufferram) || + __put_user (s.totalswap, &info->totalswap) || + __put_user (s.freeswap, &info->freeswap) || + __put_user (s.procs, &info->procs) || + __put_user (s.totalhigh, &info->totalhigh) || + __put_user (s.freehigh, &info->freehigh) || + __put_user (s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_COMPAT */ + static int __cpuinit init_timers_cpu(int cpu) { int j; -- cgit v1.2.3-70-g09d2 From 1043f65a573b65a5398925551583ea72092e1be2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Apr 2013 15:27:35 -0700 Subject: kernel/timer.c: convert compat_sys_sysinfo to COMPAT_SYSCALL_DEFINE Signed-off-by: Stephen Rothwell Cc: Thomas Gleixner Cc: Guenter Roeck Cc: Al Viro Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 06b3245624e..f0e65885b82 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1661,8 +1661,7 @@ struct compat_sysinfo { char _f[20-2*sizeof(u32)-sizeof(int)]; }; -asmlinkage long -compat_sys_sysinfo(struct compat_sysinfo __user *info) +COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) { struct sysinfo s; -- cgit v1.2.3-70-g09d2 From 4a22f16636259f503847b0805e04824171b270fc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Apr 2013 15:27:37 -0700 Subject: kernel/timer.c: move some non timer related syscalls to kernel/sys.c Andrew Morton noted: akpm3:/usr/src/25> grep SYSCALL kernel/timer.c SYSCALL_DEFINE1(alarm, unsigned int, seconds) SYSCALL_DEFINE0(getpid) SYSCALL_DEFINE0(getppid) SYSCALL_DEFINE0(getuid) SYSCALL_DEFINE0(geteuid) SYSCALL_DEFINE0(getgid) SYSCALL_DEFINE0(getegid) SYSCALL_DEFINE0(gettid) SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) Only one of those should be in kernel/timer.c. Who wrote this thing? [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Stephen Rothwell Acked-by: Thomas Gleixner Cc: Guenter Roeck Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 208 +------------------------------------------------------- 2 files changed, 212 insertions(+), 207 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/sys.c b/kernel/sys.c index 0da73cf73e6..e30eba430b9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -49,6 +49,11 @@ #include #include +#include +#include +#include +#include + #include /* Move somewhere else to avoid recompiling? */ #include @@ -1044,6 +1049,67 @@ change_okay: return old_fsgid; } +/** + * sys_getpid - return the thread group id of the current process + * + * Note, despite the name, this returns the tgid not the pid. The tgid and + * the pid are identical unless CLONE_THREAD was specified on clone() in + * which case the tgid is the same in all threads of the same group. + * + * This is SMP safe as current->tgid does not change. + */ +SYSCALL_DEFINE0(getpid) +{ + return task_tgid_vnr(current); +} + +/* Thread ID - the internal kernel "pid" */ +SYSCALL_DEFINE0(gettid) +{ + return task_pid_vnr(current); +} + +/* + * Accessing ->real_parent is not SMP-safe, it could + * change from under us. However, we can use a stale + * value of ->real_parent under rcu_read_lock(), see + * release_task()->call_rcu(delayed_put_task_struct). + */ +SYSCALL_DEFINE0(getppid) +{ + int pid; + + rcu_read_lock(); + pid = task_tgid_vnr(rcu_dereference(current->real_parent)); + rcu_read_unlock(); + + return pid; +} + +SYSCALL_DEFINE0(getuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_uid()); +} + +SYSCALL_DEFINE0(geteuid) +{ + /* Only we change this so SMP safe */ + return from_kuid_munged(current_user_ns(), current_euid()); +} + +SYSCALL_DEFINE0(getgid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_gid()); +} + +SYSCALL_DEFINE0(getegid) +{ + /* Only we change this so SMP safe */ + return from_kgid_munged(current_user_ns(), current_egid()); +} + void do_sys_times(struct tms *tms) { cputime_t tgutime, tgstime, cutime, cstime; @@ -2245,3 +2311,148 @@ int orderly_poweroff(bool force) return 0; } EXPORT_SYMBOL_GPL(orderly_poweroff); + +/** + * do_sysinfo - fill in sysinfo struct + * @info: pointer to buffer to fill + */ +static int do_sysinfo(struct sysinfo *info) +{ + unsigned long mem_total, sav_total; + unsigned int mem_unit, bitcount; + struct timespec tp; + + memset(info, 0, sizeof(struct sysinfo)); + + ktime_get_ts(&tp); + monotonic_to_bootbased(&tp); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + + get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); + + info->procs = nr_threads; + + si_meminfo(info); + si_swapinfo(info); + + /* + * If the sum of all the available memory (i.e. ram + swap) + * is less than can be stored in a 32 bit unsigned long then + * we can be binary compatible with 2.2.x kernels. If not, + * well, in that case 2.2.x was broken anyways... + * + * -Erik Andersen + */ + + mem_total = info->totalram + info->totalswap; + if (mem_total < info->totalram || mem_total < info->totalswap) + goto out; + bitcount = 0; + mem_unit = info->mem_unit; + while (mem_unit > 1) { + bitcount++; + mem_unit >>= 1; + sav_total = mem_total; + mem_total <<= 1; + if (mem_total < sav_total) + goto out; + } + + /* + * If mem_total did not overflow, multiply all memory values by + * info->mem_unit and set it to 1. This leaves things compatible + * with 2.2.x, and also retains compatibility with earlier 2.4.x + * kernels... + */ + + info->mem_unit = 1; + info->totalram <<= bitcount; + info->freeram <<= bitcount; + info->sharedram <<= bitcount; + info->bufferram <<= bitcount; + info->totalswap <<= bitcount; + info->freeswap <<= bitcount; + info->totalhigh <<= bitcount; + info->freehigh <<= bitcount; + +out: + return 0; +} + +SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) +{ + struct sysinfo val; + + do_sysinfo(&val); + + if (copy_to_user(info, &val, sizeof(struct sysinfo))) + return -EFAULT; + + return 0; +} + +#ifdef CONFIG_COMPAT +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user(s.uptime, &info->uptime) || + __put_user(s.loads[0], &info->loads[0]) || + __put_user(s.loads[1], &info->loads[1]) || + __put_user(s.loads[2], &info->loads[2]) || + __put_user(s.totalram, &info->totalram) || + __put_user(s.freeram, &info->freeram) || + __put_user(s.sharedram, &info->sharedram) || + __put_user(s.bufferram, &info->bufferram) || + __put_user(s.totalswap, &info->totalswap) || + __put_user(s.freeswap, &info->freeswap) || + __put_user(s.procs, &info->procs) || + __put_user(s.totalhigh, &info->totalhigh) || + __put_user(s.freehigh, &info->freehigh) || + __put_user(s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_COMPAT */ diff --git a/kernel/timer.c b/kernel/timer.c index f0e65885b82..09bca8ce977 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1,7 +1,7 @@ /* * linux/kernel/timer.c * - * Kernel internal timers, basic process system calls + * Kernel internal timers * * Copyright (C) 1991, 1992 Linus Torvalds * @@ -1396,61 +1396,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -/** - * sys_getpid - return the thread group id of the current process - * - * Note, despite the name, this returns the tgid not the pid. The tgid and - * the pid are identical unless CLONE_THREAD was specified on clone() in - * which case the tgid is the same in all threads of the same group. - * - * This is SMP safe as current->tgid does not change. - */ -SYSCALL_DEFINE0(getpid) -{ - return task_tgid_vnr(current); -} - -/* - * Accessing ->real_parent is not SMP-safe, it could - * change from under us. However, we can use a stale - * value of ->real_parent under rcu_read_lock(), see - * release_task()->call_rcu(delayed_put_task_struct). - */ -SYSCALL_DEFINE0(getppid) -{ - int pid; - - rcu_read_lock(); - pid = task_tgid_vnr(rcu_dereference(current->real_parent)); - rcu_read_unlock(); - - return pid; -} - -SYSCALL_DEFINE0(getuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_uid()); -} - -SYSCALL_DEFINE0(geteuid) -{ - /* Only we change this so SMP safe */ - return from_kuid_munged(current_user_ns(), current_euid()); -} - -SYSCALL_DEFINE0(getgid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_gid()); -} - -SYSCALL_DEFINE0(getegid) -{ - /* Only we change this so SMP safe */ - return from_kgid_munged(current_user_ns(), current_egid()); -} - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); @@ -1558,157 +1503,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -/* Thread ID - the internal kernel "pid" */ -SYSCALL_DEFINE0(gettid) -{ - return task_pid_vnr(current); -} - -/** - * do_sysinfo - fill in sysinfo struct - * @info: pointer to buffer to fill - */ -static int do_sysinfo(struct sysinfo *info) -{ - unsigned long mem_total, sav_total; - unsigned int mem_unit, bitcount; - struct timespec tp; - - memset(info, 0, sizeof(struct sysinfo)); - - ktime_get_ts(&tp); - monotonic_to_bootbased(&tp); - info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - - get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); - - info->procs = nr_threads; - - si_meminfo(info); - si_swapinfo(info); - - /* - * If the sum of all the available memory (i.e. ram + swap) - * is less than can be stored in a 32 bit unsigned long then - * we can be binary compatible with 2.2.x kernels. If not, - * well, in that case 2.2.x was broken anyways... - * - * -Erik Andersen - */ - - mem_total = info->totalram + info->totalswap; - if (mem_total < info->totalram || mem_total < info->totalswap) - goto out; - bitcount = 0; - mem_unit = info->mem_unit; - while (mem_unit > 1) { - bitcount++; - mem_unit >>= 1; - sav_total = mem_total; - mem_total <<= 1; - if (mem_total < sav_total) - goto out; - } - - /* - * If mem_total did not overflow, multiply all memory values by - * info->mem_unit and set it to 1. This leaves things compatible - * with 2.2.x, and also retains compatibility with earlier 2.4.x - * kernels... - */ - - info->mem_unit = 1; - info->totalram <<= bitcount; - info->freeram <<= bitcount; - info->sharedram <<= bitcount; - info->bufferram <<= bitcount; - info->totalswap <<= bitcount; - info->freeswap <<= bitcount; - info->totalhigh <<= bitcount; - info->freehigh <<= bitcount; - -out: - return 0; -} - -SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) -{ - struct sysinfo val; - - do_sysinfo(&val); - - if (copy_to_user(info, &val, sizeof(struct sysinfo))) - return -EFAULT; - - return 0; -} - -#ifdef CONFIG_COMPAT -struct compat_sysinfo { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - u16 procs; - u16 pad; - u32 totalhigh; - u32 freehigh; - u32 mem_unit; - char _f[20-2*sizeof(u32)-sizeof(int)]; -}; - -COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) -{ - struct sysinfo s; - - do_sysinfo(&s); - - /* Check to see if any memory value is too large for 32-bit and scale - * down if needed - */ - if ((s.totalram >> 32) || (s.totalswap >> 32)) { - int bitcount = 0; - - while (s.mem_unit < PAGE_SIZE) { - s.mem_unit <<= 1; - bitcount++; - } - - s.totalram >>= bitcount; - s.freeram >>= bitcount; - s.sharedram >>= bitcount; - s.bufferram >>= bitcount; - s.totalswap >>= bitcount; - s.freeswap >>= bitcount; - s.totalhigh >>= bitcount; - s.freehigh >>= bitcount; - } - - if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || - __put_user (s.uptime, &info->uptime) || - __put_user (s.loads[0], &info->loads[0]) || - __put_user (s.loads[1], &info->loads[1]) || - __put_user (s.loads[2], &info->loads[2]) || - __put_user (s.totalram, &info->totalram) || - __put_user (s.freeram, &info->freeram) || - __put_user (s.sharedram, &info->sharedram) || - __put_user (s.bufferram, &info->bufferram) || - __put_user (s.totalswap, &info->totalswap) || - __put_user (s.freeswap, &info->freeswap) || - __put_user (s.procs, &info->procs) || - __put_user (s.totalhigh, &info->totalhigh) || - __put_user (s.freehigh, &info->freehigh) || - __put_user (s.mem_unit, &info->mem_unit)) - return -EFAULT; - - return 0; -} -#endif /* CONFIG_COMPAT */ - static int __cpuinit init_timers_cpu(int cpu) { int j; -- cgit v1.2.3-70-g09d2