diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 3 | ||||
-rw-r--r-- | kernel/compat.c | 82 | ||||
-rw-r--r-- | kernel/cpu.c | 29 | ||||
-rw-r--r-- | kernel/exit.c | 8 | ||||
-rw-r--r-- | kernel/fork.c | 10 | ||||
-rw-r--r-- | kernel/futex.c | 170 | ||||
-rw-r--r-- | kernel/futex_compat.c | 142 | ||||
-rw-r--r-- | kernel/hrtimer.c | 193 | ||||
-rw-r--r-- | kernel/irq/manage.c | 1 | ||||
-rw-r--r-- | kernel/itimer.c | 14 | ||||
-rw-r--r-- | kernel/kprobes.c | 10 | ||||
-rw-r--r-- | kernel/module.c | 22 | ||||
-rw-r--r-- | kernel/panic.c | 4 | ||||
-rw-r--r-- | kernel/params.c | 2 | ||||
-rw-r--r-- | kernel/posix-timers.c | 67 | ||||
-rw-r--r-- | kernel/power/swap.c | 7 | ||||
-rw-r--r-- | kernel/profile.c | 53 | ||||
-rw-r--r-- | kernel/rcutorture.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 163 | ||||
-rw-r--r-- | kernel/softlockup.c | 2 | ||||
-rw-r--r-- | kernel/sys.c | 327 | ||||
-rw-r--r-- | kernel/sys_ni.c | 4 | ||||
-rw-r--r-- | kernel/time.c | 4 |
23 files changed, 963 insertions, 358 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index ff1c11dc12c..58908f9d156 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -12,6 +12,9 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_FUTEX) += futex.o +ifeq ($(CONFIG_COMPAT),y) +obj-$(CONFIG_FUTEX) += futex_compat.o +endif obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o diff --git a/kernel/compat.c b/kernel/compat.c index 8c9cd88b678..c1601a84f8d 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -17,10 +17,10 @@ #include <linux/time.h> #include <linux/signal.h> #include <linux/sched.h> /* for MAX_SCHEDULE_TIMEOUT */ -#include <linux/futex.h> /* for FUTEX_WAIT */ #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/security.h> +#include <linux/timex.h> #include <asm/uaccess.h> @@ -238,28 +238,6 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, return ret; } -#ifdef CONFIG_FUTEX -asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val, - struct compat_timespec __user *utime, u32 __user *uaddr2, - int val3) -{ - struct timespec t; - unsigned long timeout = MAX_SCHEDULE_TIMEOUT; - int val2 = 0; - - if ((op == FUTEX_WAIT) && utime) { - if (get_compat_timespec(&t, utime)) - return -EFAULT; - timeout = timespec_to_jiffies(&t) + 1; - } - if (op >= FUTEX_REQUEUE) - val2 = (int) (unsigned long) utime; - - return do_futex((unsigned long)uaddr, op, val, timeout, - (unsigned long)uaddr2, val2, val3); -} -#endif - asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { @@ -898,3 +876,61 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat return -ERESTARTNOHAND; } #endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */ + +asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) +{ + struct timex txc; + int ret; + + memset(&txc, 0, sizeof(struct timex)); + + if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) || + __get_user(txc.modes, &utp->modes) || + __get_user(txc.offset, &utp->offset) || + __get_user(txc.freq, &utp->freq) || + __get_user(txc.maxerror, &utp->maxerror) || + __get_user(txc.esterror, &utp->esterror) || + __get_user(txc.status, &utp->status) || + __get_user(txc.constant, &utp->constant) || + __get_user(txc.precision, &utp->precision) || + __get_user(txc.tolerance, &utp->tolerance) || + __get_user(txc.time.tv_sec, &utp->time.tv_sec) || + __get_user(txc.time.tv_usec, &utp->time.tv_usec) || + __get_user(txc.tick, &utp->tick) || + __get_user(txc.ppsfreq, &utp->ppsfreq) || + __get_user(txc.jitter, &utp->jitter) || + __get_user(txc.shift, &utp->shift) || + __get_user(txc.stabil, &utp->stabil) || + __get_user(txc.jitcnt, &utp->jitcnt) || + __get_user(txc.calcnt, &utp->calcnt) || + __get_user(txc.errcnt, &utp->errcnt) || + __get_user(txc.stbcnt, &utp->stbcnt)) + return -EFAULT; + + ret = do_adjtimex(&txc); + + if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) || + __put_user(txc.modes, &utp->modes) || + __put_user(txc.offset, &utp->offset) || + __put_user(txc.freq, &utp->freq) || + __put_user(txc.maxerror, &utp->maxerror) || + __put_user(txc.esterror, &utp->esterror) || + __put_user(txc.status, &utp->status) || + __put_user(txc.constant, &utp->constant) || + __put_user(txc.precision, &utp->precision) || + __put_user(txc.tolerance, &utp->tolerance) || + __put_user(txc.time.tv_sec, &utp->time.tv_sec) || + __put_user(txc.time.tv_usec, &utp->time.tv_usec) || + __put_user(txc.tick, &utp->tick) || + __put_user(txc.ppsfreq, &utp->ppsfreq) || + __put_user(txc.jitter, &utp->jitter) || + __put_user(txc.shift, &utp->shift) || + __put_user(txc.stabil, &utp->stabil) || + __put_user(txc.jitcnt, &utp->jitcnt) || + __put_user(txc.calcnt, &utp->calcnt) || + __put_user(txc.errcnt, &utp->errcnt) || + __put_user(txc.stbcnt, &utp->stbcnt)) + ret = -EFAULT; + + return ret; +} diff --git a/kernel/cpu.c b/kernel/cpu.c index 8be22bd8093..fe2b8d0bfe4 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -18,7 +18,7 @@ /* This protects CPUs going up and down... */ static DECLARE_MUTEX(cpucontrol); -static struct notifier_block *cpu_chain; +static BLOCKING_NOTIFIER_HEAD(cpu_chain); #ifdef CONFIG_HOTPLUG_CPU static struct task_struct *lock_cpu_hotplug_owner; @@ -71,21 +71,13 @@ EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible); /* Need to know about CPUs going up/down? */ int register_cpu_notifier(struct notifier_block *nb) { - int ret; - - if ((ret = lock_cpu_hotplug_interruptible()) != 0) - return ret; - ret = notifier_chain_register(&cpu_chain, nb); - unlock_cpu_hotplug(); - return ret; + return blocking_notifier_chain_register(&cpu_chain, nb); } EXPORT_SYMBOL(register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) { - lock_cpu_hotplug(); - notifier_chain_unregister(&cpu_chain, nb); - unlock_cpu_hotplug(); + blocking_notifier_chain_unregister(&cpu_chain, nb); } EXPORT_SYMBOL(unregister_cpu_notifier); @@ -141,7 +133,7 @@ int cpu_down(unsigned int cpu) goto out; } - err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, + err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { printk("%s: attempt to take down CPU %u failed\n", @@ -159,7 +151,7 @@ int cpu_down(unsigned int cpu) p = __stop_machine_run(take_cpu_down, NULL, cpu); if (IS_ERR(p)) { /* CPU didn't die: tell everyone. Can't complain. */ - if (notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, + if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, (void *)(long)cpu) == NOTIFY_BAD) BUG(); @@ -182,8 +174,8 @@ int cpu_down(unsigned int cpu) put_cpu(); /* CPU is completely dead: tell everyone. Too late to complain. */ - if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu) - == NOTIFY_BAD) + if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD, + (void *)(long)cpu) == NOTIFY_BAD) BUG(); check_for_tasks(cpu); @@ -211,7 +203,7 @@ int __devinit cpu_up(unsigned int cpu) goto out; } - ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); + ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { printk("%s: attempt to bring up CPU %u failed\n", __FUNCTION__, cpu); @@ -226,11 +218,12 @@ int __devinit cpu_up(unsigned int cpu) BUG_ON(!cpu_online(cpu)); /* Now call notifier in preparation. */ - notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); + blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); out_notify: if (ret != 0) - notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); + blocking_notifier_call_chain(&cpu_chain, + CPU_UP_CANCELED, hcpu); out: unlock_cpu_hotplug(); return ret; diff --git a/kernel/exit.c b/kernel/exit.c index 8037405e136..a8c7efc7a68 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -31,6 +31,8 @@ #include <linux/signal.h> #include <linux/cn_proc.h> #include <linux/mutex.h> +#include <linux/futex.h> +#include <linux/compat.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -852,6 +854,12 @@ fastcall NORET_TYPE void do_exit(long code) exit_itimers(tsk->signal); acct_process(code); } + if (unlikely(tsk->robust_list)) + exit_robust_list(tsk); +#ifdef CONFIG_COMPAT + if (unlikely(tsk->compat_robust_list)) + compat_exit_robust_list(tsk); +#endif exit_mm(tsk); exit_sem(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index a02063903aa..c49bd193b05 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -769,8 +769,7 @@ int unshare_files(void) struct files_struct *files = current->files; int rc; - if(!files) - BUG(); + BUG_ON(!files); /* This can race but the race causes us to copy when we don't need to and drop the copy */ @@ -848,7 +847,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL); sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; - sig->real_timer.data = tsk; + sig->tsk = tsk; sig->it_virt_expires = cputime_zero; sig->it_virt_incr = cputime_zero; @@ -1062,7 +1061,10 @@ static task_t *copy_process(unsigned long clone_flags, * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; - + p->robust_list = NULL; +#ifdef CONFIG_COMPAT + p->compat_robust_list = NULL; +#endif /* * sigaltstack should be cleared when sharing the same VM */ diff --git a/kernel/futex.c b/kernel/futex.c index 5efa2f97803..9c9b2b6b22d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -8,6 +8,10 @@ * Removed page pinning, fix privately mapped COW pages and other cleanups * (C) Copyright 2003, 2004 Jamie Lokier * + * Robust futex support started by Ingo Molnar + * (C) Copyright 2006 Red Hat Inc, All Rights Reserved + * Thanks to Thomas Gleixner for suggestions, analysis and fixes. + * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. @@ -829,6 +833,172 @@ error: goto out; } +/* + * Support for robust futexes: the kernel cleans up held futexes at + * thread exit time. + * + * Implementation: user-space maintains a per-thread list of locks it + * is holding. Upon do_exit(), the kernel carefully walks this list, + * and marks all locks that are owned by this thread with the + * FUTEX_OWNER_DEAD bit, and wakes up a waiter (if any). The list is + * always manipulated with the lock held, so the list is private and + * per-thread. Userspace also maintains a per-thread 'list_op_pending' + * field, to allow the kernel to clean up if the thread dies after + * acquiring the lock, but just before it could have added itself to + * the list. There can only be one such pending lock. + */ + +/** + * sys_set_robust_list - set the robust-futex list head of a task + * @head: pointer to the list-head + * @len: length of the list-head, as userspace expects + */ +asmlinkage long +sys_set_robust_list(struct robust_list_head __user *head, + size_t len) +{ + /* + * The kernel knows only one size for now: + */ + if (unlikely(len != sizeof(*head))) + return -EINVAL; + + current->robust_list = head; + + return 0; +} + +/** + * sys_get_robust_list - get the robust-futex list head of a task + * @pid: pid of the process [zero for current task] + * @head_ptr: pointer to a list-head pointer, the kernel fills it in + * @len_ptr: pointer to a length field, the kernel fills in the header size + */ +asmlinkage long +sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, + size_t __user *len_ptr) +{ + struct robust_list_head *head; + unsigned long ret; + + if (!pid) + head = current->robust_list; + else { + struct task_struct *p; + + ret = -ESRCH; + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if (!p) + goto err_unlock; + ret = -EPERM; + if ((current->euid != p->euid) && (current->euid != p->uid) && + !capable(CAP_SYS_PTRACE)) + goto err_unlock; + head = p->robust_list; + read_unlock(&tasklist_lock); + } + + if (put_user(sizeof(*head), len_ptr)) + return -EFAULT; + return put_user(head, head_ptr); + +err_unlock: + read_unlock(&tasklist_lock); + + return ret; +} + +/* + * Process a futex-list entry, check whether it's owned by the + * dying task, and do notification if so: + */ +int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) +{ + u32 uval; + +retry: + if (get_user(uval, uaddr)) + return -1; + + if ((uval & FUTEX_TID_MASK) == curr->pid) { + /* + * Ok, this dying thread is truly holding a futex + * of interest. Set the OWNER_DIED bit atomically + * via cmpxchg, and if the value had FUTEX_WAITERS + * set, wake up a waiter (if any). (We have to do a + * futex_wake() even if OWNER_DIED is already set - + * to handle the rare but possible case of recursive + * thread-death.) The rest of the cleanup is done in + * userspace. + */ + if (futex_atomic_cmpxchg_inatomic(uaddr, uval, + uval | FUTEX_OWNER_DIED) != uval) + goto retry; + + if (uval & FUTEX_WAITERS) + futex_wake((unsigned long)uaddr, 1); + } + return 0; +} + +/* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. + * + * We silently return on any sign of list-walking problem. + */ +void exit_robust_list(struct task_struct *curr) +{ + struct robust_list_head __user *head = curr->robust_list; + struct robust_list __user *entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT; + unsigned long futex_offset; + + /* + * Fetch the list head (which was registered earlier, via + * sys_set_robust_list()): + */ + if (get_user(entry, &head->list.next)) + return; + /* + * Fetch the relative futex offset: + */ + if (get_user(futex_offset, &head->futex_offset)) + return; + /* + * Fetch any possibly pending lock-add first, and handle it + * if it exists: + */ + if (get_user(pending, &head->list_op_pending)) + return; + if (pending) + handle_futex_death((void *)pending + futex_offset, curr); + + while (entry != &head->list) { + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ + if (entry != pending) + if (handle_futex_death((void *)entry + futex_offset, + curr)) + return; + /* + * Fetch the next entry in the list: + */ + if (get_user(entry, &entry->next)) + return; + /* + * Avoid excessively long or circular lists: + */ + if (!--limit) + break; + + cond_resched(); + } +} + long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3) { diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c new file mode 100644 index 00000000000..54274fc8532 --- /dev/null +++ b/kernel/futex_compat.c @@ -0,0 +1,142 @@ +/* + * linux/kernel/futex_compat.c + * + * Futex compatibililty routines. + * + * Copyright 2006, Red Hat, Inc., Ingo Molnar + */ + +#include <linux/linkage.h> +#include <linux/compat.h> +#include <linux/futex.h> + +#include <asm/uaccess.h> + +/* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. + * + * We silently return on any sign of list-walking problem. + */ +void compat_exit_robust_list(struct task_struct *curr) +{ + struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct robust_list __user *entry, *pending; + compat_uptr_t uentry, upending; + unsigned int limit = ROBUST_LIST_LIMIT; + compat_long_t futex_offset; + + /* + * Fetch the list head (which was registered earlier, via + * sys_set_robust_list()): + */ + if (get_user(uentry, &head->list.next)) + return; + entry = compat_ptr(uentry); + /* + * Fetch the relative futex offset: + */ + if (get_user(futex_offset, &head->futex_offset)) + return; + /* + * Fetch any possibly pending lock-add first, and handle it + * if it exists: + */ + if (get_user(upending, &head->list_op_pending)) + return; + pending = compat_ptr(upending); + if (upending) + handle_futex_death((void *)pending + futex_offset, curr); + + while (compat_ptr(uentry) != &head->list) { + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ + if (entry != pending) + if (handle_futex_death((void *)entry + futex_offset, + curr)) + return; + + /* + * Fetch the next entry in the list: + */ + if (get_user(uentry, (compat_uptr_t *)&entry->next)) + return; + entry = compat_ptr(uentry); + /* + * Avoid excessively long or circular lists: + */ + if (!--limit) + break; + + cond_resched(); + } +} + +asmlinkage long +compat_sys_set_robust_list(struct compat_robust_list_head __user *head, + compat_size_t len) +{ + if (unlikely(len != sizeof(*head))) + return -EINVAL; + + current->compat_robust_list = head; + + return 0; +} + +asmlinkage long +compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, + compat_size_t __user *len_ptr) +{ + struct compat_robust_list_head *head; + unsigned long ret; + + if (!pid) + head = current->compat_robust_list; + else { + struct task_struct *p; + + ret = -ESRCH; + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if (!p) + goto err_unlock; + ret = -EPERM; + if ((current->euid != p->euid) && (current->euid != p->uid) && + !capable(CAP_SYS_PTRACE)) + goto err_unlock; + head = p->compat_robust_list; + read_unlock(&tasklist_lock); + } + + if (put_user(sizeof(*head), len_ptr)) + return -EFAULT; + return put_user(ptr_to_compat(head), head_ptr); + +err_unlock: + read_unlock(&tasklist_lock); + + return ret; +} + +asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, + struct compat_timespec __user *utime, u32 __user *uaddr2, + u32 val3) +{ + struct timespec t; + unsigned long timeout = MAX_SCHEDULE_TIMEOUT; + int val2 = 0; + + if ((op == FUTEX_WAIT) && utime) { + if (get_compat_timespec(&t, utime)) + return -EFAULT; + timeout = timespec_to_jiffies(&t) + 1; + } + if (op >= FUTEX_REQUEUE) + val2 = (int) (unsigned long) utime; + + return do_futex((unsigned long)uaddr, op, val, timeout, + (unsigned long)uaddr2, val2, val3); +} diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 14bc9cfa639..0237a556eb1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -123,6 +123,26 @@ void ktime_get_ts(struct timespec *ts) EXPORT_SYMBOL_GPL(ktime_get_ts); /* + * Get the coarse grained time at the softirq based on xtime and + * wall_to_monotonic. + */ +static void hrtimer_get_softirq_time(struct hrtimer_base *base) +{ + ktime_t xtim, tomono; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + xtim = timespec_to_ktime(xtime); + tomono = timespec_to_ktime(wall_to_monotonic); + + } while (read_seqretry(&xtime_lock, seq)); + + base[CLOCK_REALTIME].softirq_time = xtim; + base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono); +} + +/* * Functions and macros which are different for UP/SMP systems are kept in a * single place */ @@ -246,7 +266,7 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) /* * Divide a ktime value by a nanosecond value */ -static unsigned long ktime_divns(const ktime_t kt, nsec_t div) +static unsigned long ktime_divns(const ktime_t kt, s64 div) { u64 dclc, inc, dns; int sft = 0; @@ -281,18 +301,17 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) * hrtimer_forward - forward the timer expiry * * @timer: hrtimer to forward + * @now: forward past this time * @interval: the interval to forward * * Forward the timer expiry so it will expire in the future. * Returns the number of overruns. */ unsigned long -hrtimer_forward(struct hrtimer *timer, ktime_t interval) +hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) { unsigned long orun = 1; - ktime_t delta, now; - - now = timer->base->get_time(); + ktime_t delta; delta = ktime_sub(now, timer->expires); @@ -303,7 +322,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t interval) interval.tv64 = timer->base->resolution.tv64; if (unlikely(delta.tv64 >= interval.tv64)) { - nsec_t incr = ktime_to_ns(interval); + s64 incr = ktime_to_ns(interval); orun = ktime_divns(delta, incr); timer->expires = ktime_add_ns(timer->expires, incr * orun); @@ -355,8 +374,6 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) rb_link_node(&timer->node, parent, link); rb_insert_color(&timer->node, &base->active); - timer->state = HRTIMER_PENDING; - if (!base->first || timer->expires.tv64 < rb_entry(base->first, struct hrtimer, node)->expires.tv64) base->first = &timer->node; @@ -376,6 +393,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) if (base->first == &timer->node) base->first = rb_next(&timer->node); rb_erase(&timer->node, &base->active); + timer->node.rb_parent = HRTIMER_INACTIVE; } /* @@ -386,7 +404,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) { if (hrtimer_active(timer)) { __remove_hrtimer(timer, base); - timer->state = HRTIMER_INACTIVE; return 1; } return 0; @@ -560,6 +577,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &bases[clock_id]; + timer->node.rb_parent = HRTIMER_INACTIVE; } /** @@ -586,48 +604,35 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) */ static inline void run_hrtimer_queue(struct hrtimer_base *base) { - ktime_t now = base->get_time(); struct rb_node *node; + if (base->get_softirq_time) + base->softirq_time = base->get_softirq_time(); + spin_lock_irq(&base->lock); while ((node = base->first)) { struct hrtimer *timer; - int (*fn)(void *); + int (*fn)(struct hrtimer *); int restart; - void *data; timer = rb_entry(node, struct hrtimer, node); - if (now.tv64 <= timer->expires.tv64) + if (base->softirq_time.tv64 <= timer->expires.tv64) break; fn = timer->function; - data = timer->data; set_curr_timer(base, timer); - timer->state = HRTIMER_RUNNING; __remove_hrtimer(timer, base); spin_unlock_irq(&base->lock); - /* - * fn == NULL is special case for the simplest timer - * variant - wake up process and do not restart: - */ - if (!fn) { - wake_up_process(data); - restart = HRTIMER_NORESTART; - } else - restart = fn(data); + restart = fn(timer); spin_lock_irq(&base->lock); - /* Another CPU has added back the timer */ - if (timer->state != HRTIMER_RUNNING) - continue; - - if (restart == HRTIMER_RESTART) + if (restart != HRTIMER_NORESTART) { + BUG_ON(hrtimer_active(timer)); enqueue_hrtimer(timer, base); - else - timer->state = HRTIMER_EXPIRED; + } } set_curr_timer(base, NULL); spin_unlock_irq(&base->lock); @@ -641,6 +646,8 @@ void hrtimer_run_queues(void) struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); int i; + hrtimer_get_softirq_time(base); + for (i = 0; i < MAX_HRTIMER_BASES; i++) run_hrtimer_queue(&base[i]); } @@ -649,79 +656,70 @@ void hrtimer_run_queues(void) * Sleep related functions: */ -/** - * schedule_hrtimer - sleep until timeout - * - * @timer: hrtimer variable initialized with the correct clock base - * @mode: timeout value is abs/rel - * - * Make the current task sleep until @timeout is - * elapsed. - * - * You can set the task state as follows - - * - * %TASK_UNINTERRUPTIBLE - at least @timeout is guaranteed to - * pass before the routine returns. The routine will return 0 - * - * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. In this case the remaining time - * will be returned - * - * The current task state is guaranteed to be TASK_RUNNING when this - * routine returns. - */ -static ktime_t __sched -schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode) -{ - /* fn stays NULL, meaning single-shot wakeup: */ - timer->data = current; +struct sleep_hrtimer { + struct hrtimer timer; + struct task_struct *task; + int expired; +}; - hrtimer_start(timer, timer->expires, mode); +static int nanosleep_wakeup(struct hrtimer *timer) +{ + struct sleep_hrtimer *t = + container_of(timer, struct sleep_hrtimer, timer); - schedule(); - hrtimer_cancel(timer); + t->expired = 1; + wake_up_process(t->task); - /* Return the remaining time: */ - if (timer->state != HRTIMER_EXPIRED) - return ktime_sub(timer->expires, timer->base->get_time()); - else - return (ktime_t) {.tv64 = 0 }; + return HRTIMER_NORESTART; } -static inline ktime_t __sched -schedule_hrtimer_interruptible(struct hrtimer *timer, - const enum hrtimer_mode mode) +static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode) { - set_current_state(TASK_INTERRUPTIBLE); + t->timer.function = nanosleep_wakeup; + t->task = current; + t->expired = 0; + + do { + set_current_state(TASK_INTERRUPTIBLE); + hrtimer_start(&t->timer, t->timer.expires, mode); + + schedule(); + + if (unlikely(!t->expired)) { + hrtimer_cancel(&t->timer); + mode = HRTIMER_ABS; + } + } while (!t->expired && !signal_pending(current)); - return schedule_hrtimer(timer, mode); + return t->expired; } static long __sched nanosleep_restart(struct restart_block *restart) { + struct sleep_hrtimer t; struct timespec __user *rmtp; struct timespec tu; - void *rfn_save = restart->fn; - struct hrtimer timer; - ktime_t rem; + ktime_t time; restart->fn = do_no_restart_syscall; - hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS); - - timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; - - rem = schedule_hrtimer_interruptible(&timer, HRTIMER_ABS); + hrtimer_init(&t.timer, restart->arg3, HRTIMER_ABS); + t.timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; - if (rem.tv64 <= 0) + if (do_nanosleep(&t, HRTIMER_ABS)) return 0; rmtp = (struct timespec __user *) restart->arg2; - tu = ktime_to_timespec(rem); - if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu))) - return -EFAULT; + if (rmtp) { + time = ktime_sub(t.timer.expires, t.timer.base->get_time()); + if (time.tv64 <= 0) + return 0; + tu = ktime_to_timespec(time); + if (copy_to_user(rmtp, &tu, sizeof(tu))) + return -EFAULT; + } - restart->fn = rfn_save; + restart->fn = nanosleep_restart; /* The other values in restart are already filled in */ return -ERESTART_RESTARTBLOCK; @@ -731,33 +729,34 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, const enum hrtimer_mode mode, const clockid_t clockid) { struct restart_block *restart; - struct hrtimer timer; + struct sleep_hrtimer t; struct timespec tu; ktime_t rem; - hrtimer_init(&timer, clockid, mode); - - timer.expires = timespec_to_ktime(*rqtp); - - rem = schedule_hrtimer_interruptible(&timer, mode); - if (rem.tv64 <= 0) + hrtimer_init(&t.timer, clockid, mode); + t.timer.expires = timespec_to_ktime(*rqtp); + if (do_nanosleep(&t, mode)) return 0; /* Absolute timers do not update the rmtp value and restart: */ if (mode == HRTIMER_ABS) return -ERESTARTNOHAND; - tu = ktime_to_timespec(rem); - - if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu))) - return -EFAULT; + if (rmtp) { + rem = ktime_sub(t.timer.expires, t.timer.base->get_time()); + if (rem.tv64 <= 0) + return 0; + tu = ktime_to_timespec(rem); + if (copy_to_user(rmtp, &tu, sizeof(tu))) + return -EFAULT; + } restart = ¤t_thread_info()->restart_block; restart->fn = nanosleep_restart; - restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; - restart->arg1 = timer.expires.tv64 >> 32; + restart->arg0 = t.timer.expires.tv64 & 0xFFFFFFFF; + restart->arg1 = t.timer.expires.tv64 >> 32; restart->arg2 = (unsigned long) rmtp; - restart->arg3 = (unsigned long) timer.base->index; + restart->arg3 = (unsigned long) t.timer.base->index; return -ERESTART_RESTARTBLOCK; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 6edfcef291e..ac766ad573e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -271,6 +271,7 @@ void free_irq(unsigned int irq, void *dev_id) struct irqaction **p; unsigned long flags; + WARN_ON(in_interrupt()); if (irq >= NR_IRQS) return; diff --git a/kernel/itimer.c b/kernel/itimer.c index 680e6b70c87..204ed7939e7 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -128,16 +128,16 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value) /* * The timer is automagically restarted, when interval != 0 */ -int it_real_fn(void *data) +int it_real_fn(struct hrtimer *timer) { - struct task_struct *tsk = (struct task_struct *) data; + struct signal_struct *sig = + container_of(timer, struct signal_struct, real_timer); - send_group_sig_info(SIGALRM, SEND_SIG_PRIV, tsk); - - if (tsk->signal->it_real_incr.tv64 != 0) { - hrtimer_forward(&tsk->signal->real_timer, - tsk->signal->it_real_incr); + send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); + if (sig->it_real_incr.tv64 != 0) { + hrtimer_forward(timer, timer->base->softirq_time, + sig->it_real_incr); return HRTIMER_RESTART; } return HRTIMER_NORESTART; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1fb9f753ef6..1156eb0977d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -323,10 +323,10 @@ struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) } /* - * This function is called from exit_thread or flush_thread when task tk's - * stack is being recycled so that we can recycle any function-return probe - * instances associated with this task. These left over instances represent - * probed functions that have been called but will never return. + * This function is called from finish_task_switch when task tk becomes dead, + * so that we can recycle any function-return probe instances associated + * with this task. These left over instances represent probed functions + * that have been called but will never return. */ void __kprobes kprobe_flush_task(struct task_struct *tk) { @@ -336,7 +336,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) unsigned long flags = 0; spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + head = kretprobe_inst_table_head(tk); hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri); diff --git a/kernel/module.c b/kernel/module.c index ddfe45ac2fd..bd088a7c149 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -64,26 +64,17 @@ static DEFINE_SPINLOCK(modlist_lock); static DEFINE_MUTEX(module_mutex); static LIST_HEAD(modules); -static DEFINE_MUTEX(notify_mutex); -static struct notifier_block * module_notify_list; +static BLOCKING_NOTIFIER_HEAD(module_notify_list); int register_module_notifier(struct notifier_block * nb) { - int err; - mutex_lock(¬ify_mutex); - err = notifier_chain_register(&module_notify_list, nb); - mutex_unlock(¬ify_mutex); - return err; + return blocking_notifier_chain_register(&module_notify_list, nb); } EXPORT_SYMBOL(register_module_notifier); int unregister_module_notifier(struct notifier_block * nb) { - int err; - mutex_lock(¬ify_mutex); - err = notifier_chain_unregister(&module_notify_list, nb); - mutex_unlock(¬ify_mutex); - return err; + return blocking_notifier_chain_unregister(&module_notify_list, nb); } EXPORT_SYMBOL(unregister_module_notifier); @@ -136,7 +127,7 @@ extern const unsigned long __start___kcrctab_gpl_future[]; #ifndef CONFIG_MODVERSIONS #define symversion(base, idx) NULL #else -#define symversion(base, idx) ((base) ? ((base) + (idx)) : NULL) +#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) #endif /* lookup symbol in given range of kernel_symbols */ @@ -1816,9 +1807,8 @@ sys_init_module(void __user *umod, /* Drop lock so they can recurse */ mutex_unlock(&module_mutex); - mutex_lock(¬ify_mutex); - notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); - mutex_unlock(¬ify_mutex); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_COMING, mod); /* Start the module */ if (mod->init != NULL) diff --git a/kernel/panic.c b/kernel/panic.c index acd95adddb9..f895c7c01d5 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(pause_on_oops_lock); int panic_timeout; EXPORT_SYMBOL(panic_timeout); -struct notifier_block *panic_notifier_list; +ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); @@ -97,7 +97,7 @@ NORET_TYPE void panic(const char * fmt, ...) smp_send_stop(); #endif - notifier_call_chain(&panic_notifier_list, 0, buf); + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); if (!panic_blink) panic_blink = no_blink; diff --git a/kernel/params.c b/kernel/params.c index 9de637a5c8b..af43ecdc8d9 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -31,7 +31,7 @@ #define DEBUGP(fmt, a...) #endif -static inline int dash2underscore(char c) +static inline char dash2underscore(char c) { if (c == '-') return '_'; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9944379360b..ac6dc874442 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -145,7 +145,7 @@ static int common_timer_set(struct k_itimer *, int, struct itimerspec *, struct itimerspec *); static int common_timer_del(struct k_itimer *timer); -static int posix_timer_fn(void *data); +static int posix_timer_fn(struct hrtimer *data); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); @@ -251,15 +251,18 @@ __initcall(init_posix_timers); static void schedule_next_timer(struct k_itimer *timr) { + struct hrtimer *timer = &timr->it.real.timer; + if (timr->it.real.interval.tv64 == 0) return; - timr->it_overrun += hrtimer_forward(&timr->it.real.timer, + timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), timr->it.real.interval); + timr->it_overrun_last = timr->it_overrun; timr->it_overrun = -1; ++timr->it_requeue_pending; - hrtimer_restart(&timr->it.real.timer); + hrtimer_restart(timer); } /* @@ -331,13 +334,14 @@ EXPORT_SYMBOL_GPL(posix_timer_event); * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. */ -static int posix_timer_fn(void *data) +static int posix_timer_fn(struct hrtimer *timer) { - struct k_itimer *timr = data; + struct k_itimer *timr; unsigned long flags; int si_private = 0; int ret = HRTIMER_NORESTART; + timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); if (timr->it.real.interval.tv64 != 0) @@ -351,7 +355,8 @@ static int posix_timer_fn(void *data) */ if (timr->it.real.interval.tv64 != 0) { timr->it_overrun += - hrtimer_forward(&timr->it.real.timer, + hrtimer_forward(timer, + timer->base->softirq_time, timr->it.real.interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; @@ -603,38 +608,41 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) static void common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { - ktime_t remaining; + ktime_t now, remaining, iv; struct hrtimer *timer = &timr->it.real.timer; memset(cur_setting, 0, sizeof(struct itimerspec)); - remaining = hrtimer_get_remaining(timer); - /* Time left ? or timer pending */ - if (remaining.tv64 > 0 || hrtimer_active(timer)) - goto calci; + iv = timr->it.real.interval; + /* interval timer ? */ - if (timr->it.real.interval.tv64 == 0) + if (iv.tv64) + cur_setting->it_interval = ktime_to_timespec(iv); + else if (!hrtimer_active(timer) && + (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) return; + + now = timer->base->get_time(); + /* - * When a requeue is pending or this is a SIGEV_NONE timer - * move the expiry time forward by intervals, so expiry is > - * now. + * When a requeue is pending or this is a SIGEV_NONE + * timer move the expiry time forward by intervals, so + * expiry is > now. */ - if (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { - timr->it_overrun += - hrtimer_forward(timer, timr->it.real.interval); - remaining = hrtimer_get_remaining(timer); - } - calci: - /* interval timer ? */ - if (timr->it.real.interval.tv64 != 0) - cur_setting->it_interval = - ktime_to_timespec(timr->it.real.interval); + if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || + (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + timr->it_overrun += hrtimer_forward(timer, now, iv); + + remaining = ktime_sub(timer->expires, now); /* Return 0 only, when the timer is expired and not pending */ - if (remaining.tv64 <= 0) - cur_setting->it_value.tv_nsec = 1; - else + if (remaining.tv64 <= 0) { + /* + * A single shot SIGEV_NONE timer must return 0, when + * it is expired ! + */ + if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + cur_setting->it_value.tv_nsec = 1; + } else cur_setting->it_value = ktime_to_timespec(remaining); } @@ -717,7 +725,6 @@ common_timer_set(struct k_itimer *timr, int flags, mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); - timr->it.real.timer.data = timr; timr->it.real.timer.function = posix_timer_fn; timer->expires = timespec_to_ktime(new_setting->it_value); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 9177f3f73a6..044b8e0c102 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -454,10 +454,11 @@ static int load_image(struct swap_map_handle *handle, nr_pages++; } } while (ret > 0); - if (!error) + if (!error) { printk("\b\b\b\bdone\n"); - if (!snapshot_image_loaded(snapshot)) - error = -ENODATA; + if (!snapshot_image_loaded(snapshot)) + error = -ENODATA; + } return error; } diff --git a/kernel/profile.c b/kernel/profile.c index ad81f799a9b..5a730fdb1a2 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -87,72 +87,52 @@ void __init profile_init(void) #ifdef CONFIG_PROFILING -static DECLARE_RWSEM(profile_rwsem); -static DEFINE_RWLOCK(handoff_lock); -static struct notifier_block * task_exit_notifier; -static struct notifier_block * task_free_notifier; -static struct notifier_block * munmap_notifier; +static BLOCKING_NOTIFIER_HEAD(task_exit_notifier); +static ATOMIC_NOTIFIER_HEAD(task_free_notifier); +static BLOCKING_NOTIFIER_HEAD(munmap_notifier); void profile_task_exit(struct task_struct * task) { - down_read(&profile_rwsem); - notifier_call_chain(&task_exit_notifier, 0, task); - up_read(&profile_rwsem); + blocking_notifier_call_chain(&task_exit_notifier, 0, task); } int profile_handoff_task(struct task_struct * task) { int ret; - read_lock(&handoff_lock); - ret = notifier_call_chain(&task_free_notifier, 0, task); - read_unlock(&handoff_lock); + ret = atomic_notifier_call_chain(&task_free_notifier, 0, task); return (ret == NOTIFY_OK) ? 1 : 0; } void profile_munmap(unsigned long addr) { - down_read(&profile_rwsem); - notifier_call_chain(&munmap_notifier, 0, (void *)addr); - up_read(&profile_rwsem); + blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr); } int task_handoff_register(struct notifier_block * n) { - int err = -EINVAL; - - write_lock(&handoff_lock); - err = notifier_chain_register(&task_free_notifier, n); - write_unlock(&handoff_lock); - return err; + return atomic_notifier_chain_register(&task_free_notifier, n); } int task_handoff_unregister(struct notifier_block * n) { - int err = -EINVAL; - - write_lock(&handoff_lock); - err = notifier_chain_unregister(&task_free_notifier, n); - write_unlock(&handoff_lock); - return err; + return atomic_notifier_chain_unregister(&task_free_notifier, n); } int profile_event_register(enum profile_type type, struct notifier_block * n) { int err = -EINVAL; - down_write(&profile_rwsem); - switch (type) { case PROFILE_TASK_EXIT: - err = notifier_chain_register(&task_exit_notifier, n); + err = blocking_notifier_chain_register( + &task_exit_notifier, n); break; case PROFILE_MUNMAP: - err = notifier_chain_register(&munmap_notifier, n); + err = blocking_notifier_chain_register( + &munmap_notifier, n); break; } - up_write(&profile_rwsem); - return err; } @@ -161,18 +141,17 @@ int profile_event_unregister(enum profile_type type, struct notifier_block * n) { int err = -EINVAL; - down_write(&profile_rwsem); - switch (type) { case PROFILE_TASK_EXIT: - err = notifier_chain_unregister(&task_exit_notifier, n); + err = blocking_notifier_chain_unregister( + &task_exit_notifier, n); break; case PROFILE_MUNMAP: - err = notifier_chain_unregister(&munmap_notifier, n); + err = blocking_notifier_chain_unregister( + &munmap_notifier, n); break; } - up_write(&profile_rwsem); return err; } diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index b4b362b5baf..8154e7589d1 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -301,7 +301,7 @@ rcu_torture_printk(char *page) long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i]; batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i]; @@ -535,7 +535,7 @@ rcu_torture_init(void) atomic_set(&n_rcu_torture_error, 0); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) atomic_set(&rcu_torture_wcount[i], 0); - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { per_cpu(rcu_torture_count, cpu)[i] = 0; per_cpu(rcu_torture_batch, cpu)[i] = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 7ffaabd64f8..a9ecac398bb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -49,6 +49,7 @@ #include <linux/syscalls.h> #include <linux/times.h> #include <linux/acct.h> +#include <linux/kprobes.h> #include <asm/tlb.h> #include <asm/unistd.h> @@ -144,7 +145,8 @@ (v1) * (v2_max) / (v1_max) #define DELTA(p) \ - (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) + (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \ + INTERACTIVE_DELTA) #define TASK_INTERACTIVE(p) \ ((p)->prio <= (p)->static_prio - DELTA(p)) @@ -1546,8 +1548,14 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) finish_lock_switch(rq, prev); if (mm) mmdrop(mm); - if (unlikely(prev_task_flags & PF_DEAD)) + if (unlikely(prev_task_flags & PF_DEAD)) { + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); put_task_struct(prev); + } } /** @@ -1617,7 +1625,7 @@ unsigned long nr_uninterruptible(void) { unsigned long i, sum = 0; - for_each_cpu(i) + for_each_possible_cpu(i) sum += cpu_rq(i)->nr_uninterruptible; /* @@ -1634,7 +1642,7 @@ unsigned long long nr_context_switches(void) { unsigned long long i, sum = 0; - for_each_cpu(i) + for_each_possible_cpu(i) sum += cpu_rq(i)->nr_switches; return sum; @@ -1644,7 +1652,7 @@ unsigned long nr_iowait(void) { unsigned long i, sum = 0; - for_each_cpu(i) + for_each_possible_cpu(i) sum += atomic_read(&cpu_rq(i)->nr_iowait); return sum; @@ -2871,13 +2879,11 @@ asmlinkage void __sched schedule(void) * schedule() atomically, we ignore that path for now. * Otherwise, whine if we are scheduling when we should not be. */ - if (likely(!current->exit_state)) { - if (unlikely(in_atomic())) { - printk(KERN_ERR "BUG: scheduling while atomic: " - "%s/0x%08x/%d\n", - current->comm, preempt_count(), current->pid); - dump_stack(); - } + if (unlikely(in_atomic() && !current->exit_state)) { + printk(KERN_ERR "BUG: scheduling while atomic: " + "%s/0x%08x/%d\n", + current->comm, preempt_count(), current->pid); + dump_stack(); } profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -5568,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu) } #endif +#ifdef CONFIG_SCHED_MC +static DEFINE_PER_CPU(struct sched_domain, core_domains); +static struct sched_group sched_group_core[NR_CPUS]; +#endif + +#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) +static int cpu_to_core_group(int cpu) +{ + return first_cpu(cpu_sibling_map[cpu]); +} +#elif defined(CONFIG_SCHED_MC) +static int cpu_to_core_group(int cpu) +{ + return cpu; +} +#endif + static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group sched_group_phys[NR_CPUS]; static int cpu_to_phys_group(int cpu) { -#ifdef CONFIG_SCHED_SMT +#if defined(CONFIG_SCHED_MC) + cpumask_t mask = cpu_coregroup_map(cpu); + return first_cpu(mask); +#elif defined(CONFIG_SCHED_SMT) return first_cpu(cpu_sibling_map[cpu]); #else return cpu; @@ -5595,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu) { return cpu_to_node(cpu); } +static void init_numa_sched_groups_power(struct sched_group *group_head) +{ + struct sched_group *sg = group_head; + int j; + + if (!sg) + return; +next_sg: + for_each_cpu_mask(j, sg->cpumask) { + struct sched_domain *sd; + + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { + /* + * Only add "power" once for each + * physical package. + */ + continue; + } + + sg->cpu_power += sd->groups->cpu_power; + } + sg = sg->next; + if (sg != group_head) + goto next_sg; +} #endif /* @@ -5670,6 +5722,17 @@ void build_sched_domains(const cpumask_t *cpu_map) sd->parent = p; sd->groups = &sched_group_phys[group]; +#ifdef CONFIG_SCHED_MC + p = sd; + sd = &per_cpu(core_domains, i); + group = cpu_to_core_group(i); + *sd = SD_MC_INIT; + sd->span = cpu_coregroup_map(i); + cpus_and(sd->span, sd->span, *cpu_map); + sd->parent = p; + sd->groups = &sched_group_core[group]; +#endif + #ifdef CONFIG_SCHED_SMT p = sd; sd = &per_cpu(cpu_domains, i); @@ -5695,6 +5758,19 @@ void build_sched_domains(const cpumask_t *cpu_map) } #endif +#ifdef CONFIG_SCHED_MC + /* Set up multi-core groups */ + for_each_cpu_mask(i, *cpu_map) { + cpumask_t this_core_map = cpu_coregroup_map(i); + cpus_and(this_core_map, this_core_map, *cpu_map); + if (i != first_cpu(this_core_map)) + continue; + init_sched_build_groups(sched_group_core, this_core_map, + &cpu_to_core_group); + } +#endif + + /* Set up physical groups */ for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); @@ -5791,51 +5867,38 @@ void build_sched_domains(const cpumask_t *cpu_map) power = SCHED_LOAD_SCALE; sd->groups->cpu_power = power; #endif +#ifdef CONFIG_SCHED_MC + sd = &per_cpu(core_domains, i); + power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) + * SCHED_LOAD_SCALE / 10; + sd->groups->cpu_power = power; + + sd = &per_cpu(phys_domains, i); + /* + * This has to be < 2 * SCHED_LOAD_SCALE + * Lets keep it SCHED_LOAD_SCALE, so that + * while calculating NUMA group's cpu_power + * we can simply do + * numa_group->cpu_power += phys_group->cpu_power; + * + * See "only add power once for each physical pkg" + * comment below + */ + sd->groups->cpu_power = SCHED_LOAD_SCALE; +#else sd = &per_cpu(phys_domains, i); power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * (cpus_weight(sd->groups->cpumask)-1) / 10; sd->groups->cpu_power = power; - -#ifdef CONFIG_NUMA - sd = &per_cpu(allnodes_domains, i); - if (sd->groups) { - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - } #endif } #ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) { - struct sched_group *sg = sched_group_nodes[i]; - int j; - - if (sg == NULL) - continue; -next_sg: - for_each_cpu_mask(j, sg->cpumask) { - struct sched_domain *sd; - int power; + for (i = 0; i < MAX_NUMNODES; i++) + init_numa_sched_groups_power(sched_group_nodes[i]); - sd = &per_cpu(phys_domains, j); - if (j != first_cpu(sd->groups->cpumask)) { - /* - * Only add "power" once for each - * physical package. - */ - continue; - } - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - - sg->cpu_power += power; - } - sg = sg->next; - if (sg != sched_group_nodes[i]) - goto next_sg; - } + init_numa_sched_groups_power(sched_group_allnodes); #endif /* Attach the domains */ @@ -5843,6 +5906,8 @@ next_sg: struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT sd = &per_cpu(cpu_domains, i); +#elif defined(CONFIG_SCHED_MC) + sd = &per_cpu(core_domains, i); #else sd = &per_cpu(phys_domains, i); #endif @@ -6015,7 +6080,7 @@ void __init sched_init(void) runqueue_t *rq; int i, j, k; - for_each_cpu(i) { + for_each_possible_cpu(i) { prio_array_t *array; rq = cpu_rq(i); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d9b3d5847ed..ced91e1ff56 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -152,5 +152,5 @@ __init void spawn_softlockup_task(void) cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); - notifier_chain_register(&panic_notifier_list, &panic_block); + atomic_notifier_chain_register(&panic_notifier_list, &panic_block); } diff --git a/kernel/sys.c b/kernel/sys.c index 38bc73ede2b..c93d37f71ae 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -95,99 +95,304 @@ int cad_pid = 1; * and the like. */ -static struct notifier_block *reboot_notifier_list; -static DEFINE_RWLOCK(notifier_lock); +static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); + +/* + * Notifier chain core routines. The exported routines below + * are layered on top of these, with appropriate locking added. + */ + +static int notifier_chain_register(struct notifier_block **nl, + struct notifier_block *n) +{ + while ((*nl) != NULL) { + if (n->priority > (*nl)->priority) + break; + nl = &((*nl)->next); + } + n->next = *nl; + rcu_assign_pointer(*nl, n); + return 0; +} + +static int notifier_chain_unregister(struct notifier_block **nl, + struct notifier_block *n) +{ + while ((*nl) != NULL) { + if ((*nl) == n) { + rcu_assign_pointer(*nl, n->next); + return 0; + } + nl = &((*nl)->next); + } + return -ENOENT; +} + +static int __kprobes notifier_call_chain(struct notifier_block **nl, + unsigned long val, void *v) +{ + int ret = NOTIFY_DONE; + struct notifier_block *nb; + + nb = rcu_dereference(*nl); + while (nb) { + ret = nb->notifier_call(nb, val, v); + if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) + break; + nb = rcu_dereference(nb->next); + } + return ret; +} + +/* + * Atomic notifier chain routines. Registration and unregistration + * use a mutex, and call_chain is synchronized by RCU (no locks). + */ /** - * notifier_chain_register - Add notifier to a notifier chain - * @list: Pointer to root list pointer + * atomic_notifier_chain_register - Add notifier to an atomic notifier chain + * @nh: Pointer to head of the atomic notifier chain * @n: New entry in notifier chain * - * Adds a notifier to a notifier chain. + * Adds a notifier to an atomic notifier chain. * * Currently always returns zero. */ + +int atomic_notifier_chain_register(struct atomic_notifier_head *nh, + struct notifier_block *n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&nh->lock, flags); + ret = notifier_chain_register(&nh->head, n); + spin_unlock_irqrestore(&nh->lock, flags); + return ret; +} + +EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); + +/** + * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain + * @nh: Pointer to head of the atomic notifier chain + * @n: Entry to remove from notifier chain + * + * Removes a notifier from an atomic notifier chain. + * + * Returns zero on success or %-ENOENT on failure. + */ +int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, + struct notifier_block *n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&nh->lock, flags); + ret = notifier_chain_unregister(&nh->head, n); + spin_unlock_irqrestore(&nh->lock, flags); + synchronize_rcu(); + return ret; +} + +EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); + +/** + * atomic_notifier_call_chain - Call functions in an atomic notifier chain + * @nh: Pointer to head of the atomic notifier chain + * @val: Value passed unmodified to notifier function + * @v: Pointer passed unmodified to notifier function + * + * Calls each function in a notifier chain in turn. The functions + * run in an atomic context, so they must not block. + * This routine uses RCU to synchronize with changes to the chain. + * + * If the return value of the notifier can be and'ed + * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain + * will return immediately, with the return value of + * the notifier function which halted execution. + * Otherwise the return value is the return value + * of the last notifier function called. + */ -int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) +int atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v) { - write_lock(¬ifier_lock); - while(*list) - { - if(n->priority > (*list)->priority) - break; - list= &((*list)->next); - } - n->next = *list; - *list=n; - write_unlock(¬ifier_lock); - return 0; + int ret; + + rcu_read_lock(); + ret = notifier_call_chain(&nh->head, val, v); + rcu_read_unlock(); + return ret; } -EXPORT_SYMBOL(notifier_chain_register); +EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); + +/* + * Blocking notifier chain routines. All access to the chain is + * synchronized by an rwsem. + */ /** - * notifier_chain_unregister - Remove notifier from a notifier chain - * @nl: Pointer to root list pointer + * blocking_notifier_chain_register - Add notifier to a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain * @n: New entry in notifier chain * - * Removes a notifier from a notifier chain. + * Adds a notifier to a blocking notifier chain. + * Must be called in process context. * - * Returns zero on success, or %-ENOENT on failure. + * Currently always returns zero. */ -int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) +int blocking_notifier_chain_register(struct blocking_notifier_head *nh, + struct notifier_block *n) { - write_lock(¬ifier_lock); - while((*nl)!=NULL) - { - if((*nl)==n) - { - *nl=n->next; - write_unlock(¬ifier_lock); - return 0; - } - nl=&((*nl)->next); - } - write_unlock(¬ifier_lock); - return -ENOENT; + int ret; + + /* + * This code gets used during boot-up, when task switching is + * not yet working and interrupts must remain disabled. At + * such times we must not call down_write(). + */ + if (unlikely(system_state == SYSTEM_BOOTING)) + return notifier_chain_register(&nh->head, n); + + down_write(&nh->rwsem); + ret = notifier_chain_register(&nh->head, n); + up_write(&nh->rwsem); + return ret; } -EXPORT_SYMBOL(notifier_chain_unregister); +EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); /** - * notifier_call_chain - Call functions in a notifier chain - * @n: Pointer to root pointer of notifier chain + * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain + * @n: Entry to remove from notifier chain + * + * Removes a notifier from a blocking notifier chain. + * Must be called from process context. + * + * Returns zero on success or %-ENOENT on failure. + */ +int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, + struct notifier_block *n) +{ + int ret; + + /* + * This code gets used during boot-up, when task switching is + * not yet working and interrupts must remain disabled. At + * such times we must not call down_write(). + */ + if (unlikely(system_state == SYSTEM_BOOTING)) + return notifier_chain_unregister(&nh->head, n); + + down_write(&nh->rwsem); + ret = notifier_chain_unregister(&nh->head, n); + up_write(&nh->rwsem); + return ret; +} + +EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); + +/** + * blocking_notifier_call_chain - Call functions in a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain * @val: Value passed unmodified to notifier function * @v: Pointer passed unmodified to notifier function * - * Calls each function in a notifier chain in turn. + * Calls each function in a notifier chain in turn. The functions + * run in a process context, so they are allowed to block. * - * If the return value of the notifier can be and'd - * with %NOTIFY_STOP_MASK, then notifier_call_chain + * If the return value of the notifier can be and'ed + * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain * will return immediately, with the return value of * the notifier function which halted execution. - * Otherwise, the return value is the return value + * Otherwise the return value is the return value * of the last notifier function called. */ -int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) +int blocking_notifier_call_chain(struct blocking_notifier_head *nh, + unsigned long val, void *v) { - int ret=NOTIFY_DONE; - struct notifier_block *nb = *n; + int ret; - while(nb) - { - ret=nb->notifier_call(nb,val,v); - if(ret&NOTIFY_STOP_MASK) - { - return ret; - } - nb=nb->next; - } + down_read(&nh->rwsem); + ret = notifier_call_chain(&nh->head, val, v); + up_read(&nh->rwsem); return ret; } -EXPORT_SYMBOL(notifier_call_chain); +EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); + +/* + * Raw notifier chain routines. There is no protection; + * the caller must provide it. Use at your own risk! + */ + +/** + * raw_notifier_chain_register - Add notifier to a raw notifier chain + * @nh: Pointer to head of the raw notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to a raw notifier chain. + * All locking must be provided by the caller. + * + * Currently always returns zero. + */ + +int raw_notifier_chain_register(struct raw_notifier_head *nh, + struct notifier_block *n) +{ + return notifier_chain_register(&nh->head, n); +} + +EXPORT_SYMBOL_GPL(raw_notifier_chain_register); + +/** + * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain + * @nh: Pointer to head of the raw notifier chain + * @n: Entry to remove from notifier chain + * + * Removes a notifier from a raw notifier chain. + * All locking must be provided by the caller. + * + * Returns zero on success or %-ENOENT on failure. + */ +int raw_notifier_chain_unregister(struct raw_notifier_head *nh, + struct notifier_block *n) +{ + return notifier_chain_unregister(&nh->head, n); +} + +EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); + +/** + * raw_notifier_call_chain - Call functions in a raw notifier chain + * @nh: Pointer to head of the raw notifier chain + * @val: Value passed unmodified to notifier function + * @v: Pointer passed unmodified to notifier function + * + * Calls each function in a notifier chain in turn. The functions + * run in an undefined context. + * All locking must be provided by the caller. + * + * If the return value of the notifier can be and'ed + * with %NOTIFY_STOP_MASK then raw_notifier_call_chain + * will return immediately, with the return value of + * the notifier function which halted execution. + * Otherwise the return value is the return value + * of the last notifier function called. + */ + +int raw_notifier_call_chain(struct raw_notifier_head *nh, + unsigned long val, void *v) +{ + return notifier_call_chain(&nh->head, val, v); +} + +EXPORT_SYMBOL_GPL(raw_notifier_call_chain); /** * register_reboot_notifier - Register function to be called at reboot time @@ -196,13 +401,13 @@ EXPORT_SYMBOL(notifier_call_chain); * Registers a function with the list of functions * to be called at reboot time. * - * Currently always returns zero, as notifier_chain_register + * Currently always returns zero, as blocking_notifier_chain_register * always returns zero. */ int register_reboot_notifier(struct notifier_block * nb) { - return notifier_chain_register(&reboot_notifier_list, nb); + return blocking_notifier_chain_register(&reboot_notifier_list, nb); } EXPORT_SYMBOL(register_reboot_notifier); @@ -219,7 +424,7 @@ EXPORT_SYMBOL(register_reboot_notifier); int unregister_reboot_notifier(struct notifier_block * nb) { - return notifier_chain_unregister(&reboot_notifier_list, nb); + return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); } EXPORT_SYMBOL(unregister_reboot_notifier); @@ -380,7 +585,7 @@ EXPORT_SYMBOL_GPL(emergency_restart); void kernel_restart_prepare(char *cmd) { - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); + blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); } @@ -430,7 +635,7 @@ EXPORT_SYMBOL_GPL(kernel_kexec); void kernel_shutdown_prepare(enum system_states state) { - notifier_call_chain(&reboot_notifier_list, + blocking_notifier_call_chain(&reboot_notifier_list, (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); system_state = state; device_shutdown(); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1067090db6b..d82864c4a61 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -42,6 +42,10 @@ cond_syscall(sys_recvmsg); cond_syscall(sys_socketcall); cond_syscall(sys_futex); cond_syscall(compat_sys_futex); +cond_syscall(sys_set_robust_list); +cond_syscall(compat_sys_set_robust_list); +cond_syscall(sys_get_robust_list); +cond_syscall(compat_sys_get_robust_list); cond_syscall(sys_epoll_create); cond_syscall(sys_epoll_ctl); cond_syscall(sys_epoll_wait); diff --git a/kernel/time.c b/kernel/time.c index e00a97b7724..ff8e7019c4c 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -610,7 +610,7 @@ void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) * * Returns the timespec representation of the nsec parameter. */ -struct timespec ns_to_timespec(const nsec_t nsec) +struct timespec ns_to_timespec(const s64 nsec) { struct timespec ts; @@ -630,7 +630,7 @@ struct timespec ns_to_timespec(const nsec_t nsec) * * Returns the timeval representation of the nsec parameter. */ -struct timeval ns_to_timeval(const nsec_t nsec) +struct timeval ns_to_timeval(const s64 nsec) { struct timespec ts = ns_to_timespec(nsec); struct timeval tv; |