From ec44bc7acc3687ba6ae8154b4b5a845b70279237 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:57 +0000 Subject: timers: Create detach_if_pending() and use it Most callers of detach_timer() have the same pattern around them. Check whether the timer is pending and eventually updating base->next_timer. Create detach_if_pending() and replace the duplicated code. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gilad Ben-Yossef Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.131246037@linutronix.de --- kernel/timer.c | 56 +++++++++++++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 6ec7e7e0db4..0f70deb2015 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -654,8 +654,7 @@ void init_timer_deferrable_key(struct timer_list *timer, } EXPORT_SYMBOL(init_timer_deferrable_key); -static inline void detach_timer(struct timer_list *timer, - int clear_pending) +static inline void detach_timer(struct timer_list *timer, bool clear_pending) { struct list_head *entry = &timer->entry; @@ -667,6 +666,19 @@ static inline void detach_timer(struct timer_list *timer, entry->prev = LIST_POISON2; } +static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, + bool clear_pending) +{ + if (!timer_pending(timer)) + return 0; + + detach_timer(timer, clear_pending); + if (timer->expires == base->next_timer && + !tbase_get_deferrable(timer->base)) + base->next_timer = base->timer_jiffies; + return 1; +} + /* * We are using hashed locking: holding per_cpu(tvec_bases).lock * means that all timers which are tied to this base via timer->base are @@ -712,16 +724,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, base = lock_timer_base(timer, &flags); - if (timer_pending(timer)) { - detach_timer(timer, 0); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; - } else { - if (pending_only) - goto out_unlock; - } + ret = detach_if_pending(timer, base, false); + if (!ret && pending_only) + goto out_unlock; debug_activate(timer, expires); @@ -959,13 +964,7 @@ int del_timer(struct timer_list *timer) timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); - if (timer_pending(timer)) { - detach_timer(timer, 1); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; - } + ret = detach_if_pending(timer, base, true); spin_unlock_irqrestore(&base->lock, flags); } @@ -990,19 +989,10 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer == timer) - goto out; - - timer_stats_timer_clear_start_info(timer); - ret = 0; - if (timer_pending(timer)) { - detach_timer(timer, 1); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; + if (base->running_timer != timer) { + timer_stats_timer_clear_start_info(timer); + ret = detach_if_pending(timer, base, true); } -out: spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1178,7 +1168,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); base->running_timer = timer; - detach_timer(timer, 1); + detach_timer(timer, true); spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, data); @@ -1714,7 +1704,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea while (!list_empty(head)) { timer = list_first_entry(head, struct timer_list, entry); - detach_timer(timer, 0); + detach_timer(timer, false); timer_set_base(timer, new_base); if (time_before(timer->expires, new_base->next_timer) && !tbase_get_deferrable(timer->base)) -- cgit v1.2.3-70-g09d2 From facbb4a7efbd658046bf615f03cd97a1504785d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:57 +0000 Subject: timers: Consolidate base->next_timer update Another bunch of mindlessly copied code. All callers of internal_add_timer() except the recascading code updates base->next_timer. Move this into internal_add_timer() and let the cascading code call __internal_add_timer(). Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gilad Ben-Yossef Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.189946224@linutronix.de --- kernel/timer.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 0f70deb2015..7207690b535 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -330,7 +330,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +static void +__internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; unsigned long idx = expires - base->timer_jiffies; @@ -372,6 +373,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) list_add_tail(&timer->entry, vec); } +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +{ + __internal_add_timer(base, timer); + /* + * Update base->next_timer if this is the earliest one. + */ + if (time_before(timer->expires, base->next_timer) && + !tbase_get_deferrable(timer->base)) + base->next_timer = timer->expires; +} + #ifdef CONFIG_TIMER_STATS void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) { @@ -757,9 +769,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, } timer->expires = expires; - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; internal_add_timer(base, timer); out_unlock: @@ -925,9 +934,6 @@ void add_timer_on(struct timer_list *timer, int cpu) spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); debug_activate(timer, timer->expires); - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; internal_add_timer(base, timer); /* * Check whether the other CPU is idle and needs to be @@ -1079,7 +1085,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) */ list_for_each_entry_safe(timer, tmp, &tv_list, entry) { BUG_ON(tbase_get_base(timer->base) != base); - internal_add_timer(base, timer); + /* No accounting, while moving them */ + __internal_add_timer(base, timer); } return index; @@ -1706,9 +1713,6 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea timer = list_first_entry(head, struct timer_list, entry); detach_timer(timer, false); timer_set_base(timer, new_base); - if (time_before(timer->expires, new_base->next_timer) && - !tbase_get_deferrable(timer->base)) - new_base->next_timer = timer->expires; internal_add_timer(new_base, timer); } } -- cgit v1.2.3-70-g09d2 From 99d5f3aac674fe081ffddd2dbb8946ccbc14c410 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:58 +0000 Subject: timers: Add accounting of non deferrable timers The code in get_next_timer_interrupt() is suboptimal as it has to run through the cascade to find the next expiring timer. On a completely idle core we should only do that when there is an active timer enqueued and base->next_timer does not give us a fast answer. Add accounting of the active timers to the now consolidated attach/detach code. I deliberately avoided sanity checks because the code is fully symetric and any fiddling with timers w/o using the API functions will lead to cute explosions anyway. ulong is big enough even on 32bit and if we really run into the situation to have more than 1<<32 timers enqueued there, then we are definitely not in a state to go idle and run through that code. This allows us to fix another shortcoming of get_next_timer_interrupt(). Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gilad Ben-Yossef Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.236377028@linutronix.de --- kernel/timer.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 7207690b535..7fada698bd1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -77,6 +77,7 @@ struct tvec_base { struct timer_list *running_timer; unsigned long timer_jiffies; unsigned long next_timer; + unsigned long active_timers; struct tvec_root tv1; struct tvec tv2; struct tvec tv3; @@ -377,11 +378,13 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { __internal_add_timer(base, timer); /* - * Update base->next_timer if this is the earliest one. + * Update base->active_timers and base->next_timer */ - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; + if (!tbase_get_deferrable(timer->base)) { + if (time_before(timer->expires, base->next_timer)) + base->next_timer = timer->expires; + base->active_timers++; + } } #ifdef CONFIG_TIMER_STATS @@ -678,6 +681,14 @@ static inline void detach_timer(struct timer_list *timer, bool clear_pending) entry->prev = LIST_POISON2; } +static inline void +detach_expired_timer(struct timer_list *timer, struct tvec_base *base) +{ + detach_timer(timer, true); + if (!tbase_get_deferrable(timer->base)) + timer->base->active_timers--; +} + static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, bool clear_pending) { @@ -685,9 +696,11 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, return 0; detach_timer(timer, clear_pending); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; + if (!tbase_get_deferrable(timer->base)) { + timer->base->active_timers--; + if (timer->expires == base->next_timer) + base->next_timer = base->timer_jiffies; + } return 1; } @@ -1175,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); base->running_timer = timer; - detach_timer(timer, true); + detach_expired_timer(timer, base); spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, data); @@ -1701,6 +1714,7 @@ static int __cpuinit init_timers_cpu(int cpu) base->timer_jiffies = jiffies; base->next_timer = base->timer_jiffies; + base->active_timers = 0; return 0; } @@ -1711,6 +1725,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea while (!list_empty(head)) { timer = list_first_entry(head, struct timer_list, entry); + /* We ignore the accounting on the dying cpu */ detach_timer(timer, false); timer_set_base(timer, new_base); internal_add_timer(new_base, timer); -- cgit v1.2.3-70-g09d2 From e40468a54882ef7411fb178dbf2e465ec2349af7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:59 +0000 Subject: timers: Improve get_next_timer_interrupt() Gilad reported at http://lkml.kernel.org/r/1336056962-10465-2-git-send-email-gilad@benyossef.com "Current timer code fails to correctly return a value meaning that there is no future timer event, with the result that the timer keeps getting re-armed in HZ one shot mode even when we could turn it off, generating unneeded interrupts. What is happening is that when __next_timer_interrupt() wishes to return a value that signifies "there is no future timer event", it returns (base->timer_jiffies + NEXT_TIMER_MAX_DELTA). However, the code in tick_nohz_stop_sched_tick(), which called __next_timer_interrupt() via get_next_timer_interrupt(), compares the return value to (last_jiffies + NEXT_TIMER_MAX_DELTA) to see if the timer needs to be re-armed. base->timer_jiffies != last_jiffies and so tick_nohz_stop_sched_tick() interperts the return value as indication that there is a distant future event 12 days from now and programs the timer to fire next after KTIME_MAX nsecs instead of avoiding to arm it. This ends up causing a needless interrupt once every KTIME_MAX nsecs." Fix this by using the new active timer accounting. This avoids scans when no active timer is enqueued completely, so we don't have to rely on base->timer_next and base->timer_jiffies anymore. Reported-by: Gilad Ben-Yossef Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.317535385@linutronix.de --- kernel/timer.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 7fada698bd1..a61c09374eb 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1326,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, unsigned long get_next_timer_interrupt(unsigned long now) { struct tvec_base *base = __this_cpu_read(tvec_bases); - unsigned long expires; + unsigned long expires = now + NEXT_TIMER_MAX_DELTA; /* * Pretend that there is no timer pending if the cpu is offline. * Possible pending timers will be migrated later to an active cpu. */ if (cpu_is_offline(smp_processor_id())) - return now + NEXT_TIMER_MAX_DELTA; + return expires; + spin_lock(&base->lock); - if (time_before_eq(base->next_timer, base->timer_jiffies)) - base->next_timer = __next_timer_interrupt(base); - expires = base->next_timer; + if (base->active_timers) { + if (time_before_eq(base->next_timer, base->timer_jiffies)) + base->next_timer = __next_timer_interrupt(base); + expires = base->next_timer; + } spin_unlock(&base->lock); if (time_before_eq(expires, now)) -- cgit v1.2.3-70-g09d2 From be53db6e4edd9dc013b21a929ad2b142dea8b9c0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 14:40:59 +1200 Subject: alpha: take a bunch of syscalls into osf_sys.c New helper: current_thread_info(). Allows to do a bunch of odd syscalls in C. While we are at it, there had never been a reason to do osf_getpriority() in assembler. We also get "namespace"-aware (read: consistent with getuid(2), etc.) behaviour from getx?id() syscalls now. Signed-off-by: Al Viro Signed-off-by: Michael Cree Acked-by: Matt Turner Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/ptrace.h | 5 +- arch/alpha/kernel/entry.S | 109 ---------------------------------------- arch/alpha/kernel/osf_sys.c | 49 ++++++++++++++++++ arch/alpha/kernel/systbls.S | 2 +- kernel/timer.c | 9 ---- 5 files changed, 54 insertions(+), 120 deletions(-) (limited to 'kernel/timer.c') diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h index fd698a174f2..b87755a1955 100644 --- a/arch/alpha/include/asm/ptrace.h +++ b/arch/alpha/include/asm/ptrace.h @@ -76,7 +76,10 @@ struct switch_stack { #define task_pt_regs(task) \ ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1) -#define force_successful_syscall_return() (task_pt_regs(current)->r0 = 0) +#define current_pt_regs() \ + ((struct pt_regs *) ((char *)current_thread_info() + 2*PAGE_SIZE) - 1) + +#define force_successful_syscall_return() (current_pt_regs()->r0 = 0) #endif diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 6d159cee5f2..22b0c4d414a 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -796,115 +796,6 @@ sys_rt_sigreturn: br ret_from_sys_call .end sys_rt_sigreturn - .align 4 - .globl sys_sethae - .ent sys_sethae -sys_sethae: - .prologue 0 - stq $16, 152($sp) - ret -.end sys_sethae - - .align 4 - .globl osf_getpriority - .ent osf_getpriority -osf_getpriority: - lda $sp, -16($sp) - stq $26, 0($sp) - .prologue 0 - - jsr $26, sys_getpriority - - ldq $26, 0($sp) - blt $0, 1f - - /* Return value is the unbiased priority, i.e. 20 - prio. - This does result in negative return values, so signal - no error by writing into the R0 slot. */ - lda $1, 20 - stq $31, 16($sp) - subl $1, $0, $0 - unop - -1: lda $sp, 16($sp) - ret -.end osf_getpriority - - .align 4 - .globl sys_getxuid - .ent sys_getxuid -sys_getxuid: - .prologue 0 - ldq $2, TI_TASK($8) - ldq $3, TASK_CRED($2) - ldl $0, CRED_UID($3) - ldl $1, CRED_EUID($3) - stq $1, 80($sp) - ret -.end sys_getxuid - - .align 4 - .globl sys_getxgid - .ent sys_getxgid -sys_getxgid: - .prologue 0 - ldq $2, TI_TASK($8) - ldq $3, TASK_CRED($2) - ldl $0, CRED_GID($3) - ldl $1, CRED_EGID($3) - stq $1, 80($sp) - ret -.end sys_getxgid - - .align 4 - .globl sys_getxpid - .ent sys_getxpid -sys_getxpid: - .prologue 0 - ldq $2, TI_TASK($8) - - /* See linux/kernel/timer.c sys_getppid for discussion - about this loop. */ - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - ldl $0, TASK_TGID($2) -1: ldl $1, TASK_TGID($4) -#ifdef CONFIG_SMP - mov $4, $5 - mb - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - cmpeq $4, $5, $5 - beq $5, 1b -#endif - stq $1, 80($sp) - ret -.end sys_getxpid - - .align 4 - .globl sys_alpha_pipe - .ent sys_alpha_pipe -sys_alpha_pipe: - lda $sp, -16($sp) - stq $26, 0($sp) - .prologue 0 - - mov $31, $17 - lda $16, 8($sp) - jsr $26, do_pipe_flags - - ldq $26, 0($sp) - bne $0, 1f - - /* The return values are in $0 and $20. */ - ldl $1, 12($sp) - ldl $0, 8($sp) - - stq $1, 80+16($sp) -1: lda $sp, 16($sp) - ret -.end sys_alpha_pipe - .align 4 .globl sys_execve .ent sys_execve diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 98a103621af..bc1acdda7a5 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1404,3 +1404,52 @@ SYSCALL_DEFINE3(osf_writev, unsigned long, fd, } #endif + +SYSCALL_DEFINE2(osf_getpriority, int, which, int, who) +{ + int prio = sys_getpriority(which, who); + if (prio >= 0) { + /* Return value is the unbiased priority, i.e. 20 - prio. + This does result in negative return values, so signal + no error */ + force_successful_syscall_return(); + prio = 20 - prio; + } + return prio; +} + +SYSCALL_DEFINE0(getxuid) +{ + current_pt_regs()->r20 = sys_geteuid(); + return sys_getuid(); +} + +SYSCALL_DEFINE0(getxgid) +{ + current_pt_regs()->r20 = sys_getegid(); + return sys_getgid(); +} + +SYSCALL_DEFINE0(getxpid) +{ + current_pt_regs()->r20 = sys_getppid(); + return sys_getpid(); +} + +SYSCALL_DEFINE0(alpha_pipe) +{ + int fd[2]; + int res = do_pipe_flags(fd, 0); + if (!res) { + /* The return values are in $0 and $20. */ + current_pt_regs()->r20 = fd[1]; + res = fd[0]; + } + return res; +} + +SYSCALL_DEFINE1(sethae, unsigned long, val) +{ + current_pt_regs()->hae = val; + return 0; +} diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index b64157c98e0..2ac6b45c3e0 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -111,7 +111,7 @@ sys_call_table: .quad sys_socket .quad sys_connect .quad sys_accept - .quad osf_getpriority /* 100 */ + .quad sys_osf_getpriority /* 100 */ .quad sys_send .quad sys_recv .quad sys_sigreturn diff --git a/kernel/timer.c b/kernel/timer.c index a61c09374eb..8c5e7b908c6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ - /** * sys_getpid - return the thread group id of the current process * @@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid) return from_kgid_munged(current_user_ns(), current_egid()); } -#endif - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); -- cgit v1.2.3-70-g09d2