diff options
Diffstat (limited to 'kernel')
43 files changed, 1742 insertions, 1692 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 188c43223f5..1c9938addb9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o + notifier.o ksysfs.o pm_qos_params.o sched_clock.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b9d467d83fc..fbc6fc8949b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -575,7 +575,7 @@ static struct inode_operations cgroup_dir_inode_operations; static struct file_operations proc_cgroupstats_operations; static struct backing_dev_info cgroup_backing_dev_info = { - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) diff --git a/kernel/compat.c b/kernel/compat.c index e1ef04870c2..32c254a8ab9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -898,7 +898,7 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } #endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */ @@ -955,7 +955,8 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) __put_user(txc.jitcnt, &utp->jitcnt) || __put_user(txc.calcnt, &utp->calcnt) || __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt)) + __put_user(txc.stbcnt, &utp->stbcnt) || + __put_user(txc.tai, &utp->tai)) ret = -EFAULT; return ret; @@ -1080,4 +1081,3 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) return 0; } - diff --git a/kernel/cpu.c b/kernel/cpu.c index a98f6ab16ec..c77bc3a1c72 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -215,7 +215,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); printk("%s: attempt to take down CPU %u failed\n", - __FUNCTION__, cpu); + __func__, cpu); err = -EINVAL; goto out_release; } @@ -295,7 +295,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) if (ret == NOTIFY_BAD) { nr_calls--; printk("%s: attempt to bring up CPU %u failed\n", - __FUNCTION__, cpu); + __func__, cpu); ret = -EINVAL; goto out_notify; } diff --git a/kernel/exit.c b/kernel/exit.c index ae0f2c4e452..1510f78a0ff 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -19,6 +19,7 @@ #include <linux/acct.h> #include <linux/tsacct_kern.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/binfmts.h> #include <linux/nsproxy.h> #include <linux/pid_namespace.h> @@ -52,6 +53,11 @@ static void exit_mm(struct task_struct * tsk); +static inline int task_detached(struct task_struct *p) +{ + return p->exit_signal == -1; +} + static void __unhash_process(struct task_struct *p) { nr_threads--; @@ -160,7 +166,7 @@ repeat: zap_leader = 0; leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { - BUG_ON(leader->exit_signal == -1); + BUG_ON(task_detached(leader)); do_notify_parent(leader, leader->exit_signal); /* * If we were the last child thread and the leader has @@ -170,7 +176,7 @@ repeat: * do_notify_parent() will have marked it self-reaping in * that case. */ - zap_leader = (leader->exit_signal == -1); + zap_leader = task_detached(leader); } write_unlock_irq(&tasklist_lock); @@ -329,13 +335,11 @@ void __set_special_pids(struct pid *pid) pid_t nr = pid_nr(pid); if (task_session(curr) != pid) { - detach_pid(curr, PIDTYPE_SID); - attach_pid(curr, PIDTYPE_SID, pid); + change_pid(curr, PIDTYPE_SID, pid); set_task_session(curr, nr); } if (task_pgrp(curr) != pid) { - detach_pid(curr, PIDTYPE_PGID); - attach_pid(curr, PIDTYPE_PGID, pid); + change_pid(curr, PIDTYPE_PGID, pid); set_task_pgrp(curr, nr); } } @@ -693,7 +697,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) if (unlikely(traced)) { /* Preserve ptrace links if someone else is tracing this child. */ list_del_init(&p->ptrace_list); - if (p->parent != p->real_parent) + if (ptrace_reparented(p)) list_add(&p->ptrace_list, &p->real_parent->ptrace_children); } else { /* If this child is being traced, then we're the one tracing it @@ -717,18 +721,18 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) /* If this is a threaded reparent there is no need to * notify anyone anything has happened. */ - if (p->real_parent->group_leader == father->group_leader) + if (same_thread_group(p->real_parent, father)) return; /* We don't want people slaying init. */ - if (p->exit_signal != -1) + if (!task_detached(p)) p->exit_signal = SIGCHLD; /* If we'd notified the old parent about this child's death, * also notify the new parent. */ if (!traced && p->exit_state == EXIT_ZOMBIE && - p->exit_signal != -1 && thread_group_empty(p)) + !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); kill_orphaned_pgrp(p, father); @@ -781,18 +785,18 @@ static void forget_original_parent(struct task_struct *father) } else { /* reparent ptraced task to its real parent */ __ptrace_unlink (p); - if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 && + if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); } /* - * if the ptraced child is a zombie with exit_signal == -1 - * we must collect it before we exit, or it will remain - * zombie forever since we prevented it from self-reap itself - * while it was being traced by us, to be able to see it in wait4. + * if the ptraced child is a detached zombie we must collect + * it before we exit, or it will remain zombie forever since + * we prevented it from self-reap itself while it was being + * traced by us, to be able to see it in wait4. */ - if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1)) + if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) list_add(&p->ptrace_list, &ptrace_dead); } @@ -849,29 +853,30 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * we have changed execution domain as these two values started * the same after a fork. */ - if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && + if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && (tsk->parent_exec_id != tsk->real_parent->self_exec_id || - tsk->self_exec_id != tsk->parent_exec_id) - && !capable(CAP_KILL)) + tsk->self_exec_id != tsk->parent_exec_id) && + !capable(CAP_KILL)) tsk->exit_signal = SIGCHLD; - /* If something other than our normal parent is ptracing us, then * send it a SIGCHLD instead of honoring exit_signal. exit_signal * only has special meaning to our real parent. */ - if (tsk->exit_signal != -1 && thread_group_empty(tsk)) { - int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; + if (!task_detached(tsk) && thread_group_empty(tsk)) { + int signal = ptrace_reparented(tsk) ? + SIGCHLD : tsk->exit_signal; do_notify_parent(tsk, signal); } else if (tsk->ptrace) { do_notify_parent(tsk, SIGCHLD); } state = EXIT_ZOMBIE; - if (tsk->exit_signal == -1 && likely(!tsk->ptrace)) + if (task_detached(tsk) && likely(!tsk->ptrace)) state = EXIT_DEAD; tsk->exit_state = state; + /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && tsk->signal->notify_count < 0 && tsk->signal->group_exit_task) @@ -1115,12 +1120,13 @@ asmlinkage long sys_exit(int error_code) NORET_TYPE void do_group_exit(int exit_code) { + struct signal_struct *sig = current->signal; + BUG_ON(exit_code & 0x80); /* core dumps don't get here */ - if (current->signal->flags & SIGNAL_GROUP_EXIT) - exit_code = current->signal->group_exit_code; + if (signal_group_exit(sig)) + exit_code = sig->group_exit_code; else if (!thread_group_empty(current)) { - struct signal_struct *const sig = current->signal; struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); if (signal_group_exit(sig)) @@ -1172,7 +1178,7 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, * Do not consider detached threads that are * not ptraced: */ - if (p->exit_signal == -1 && !p->ptrace) + if (task_detached(p) && !p->ptrace) return 0; /* Wait for all children (clone and not) if __WALL is set; @@ -1262,8 +1268,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap, return 0; } - /* traced means p->ptrace, but not vice versa */ - traced = (p->real_parent != p->parent); + traced = ptrace_reparented(p); if (likely(!traced)) { struct signal_struct *psig; @@ -1364,9 +1369,9 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * If it's still not detached after that, don't release * it now. */ - if (p->exit_signal != -1) { + if (!task_detached(p)) { do_notify_parent(p, p->exit_signal); - if (p->exit_signal != -1) { + if (!task_detached(p)) { p->exit_state = EXIT_ZOMBIE; p = NULL; } diff --git a/kernel/fork.c b/kernel/fork.c index 068ffe00752..933e60ebcca 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -22,6 +22,7 @@ #include <linux/mempolicy.h> #include <linux/sem.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/key.h> #include <linux/binfmts.h> #include <linux/mman.h> @@ -892,7 +893,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; - sig->curr_target = NULL; + sig->curr_target = tsk; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); diff --git a/kernel/futex.c b/kernel/futex.c index e43945e995f..449def8074f 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -104,10 +104,6 @@ struct futex_q { /* Key which the futex is hashed on: */ union futex_key key; - /* For fd, sigio sent using these: */ - int fd; - struct file *filp; - /* Optional priority inheritance state: */ struct futex_pi_state *pi_state; struct task_struct *task; @@ -126,9 +122,6 @@ struct futex_hash_bucket { static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; -/* Futex-fs vfsmount entry: */ -static struct vfsmount *futex_mnt; - /* * Take mm->mmap_sem, when futex is shared */ @@ -610,8 +603,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, static void wake_futex(struct futex_q *q) { plist_del(&q->list, &q->list.plist); - if (q->filp) - send_sigio(&q->filp->f_owner, q->fd, POLL_IN); /* * The lock in wake_up_all() is a crucial memory barrier after the * plist_del() and also before assigning to q->lock_ptr. @@ -988,14 +979,10 @@ out: } /* The key must be already stored in q->key. */ -static inline struct futex_hash_bucket * -queue_lock(struct futex_q *q, int fd, struct file *filp) +static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) { struct futex_hash_bucket *hb; - q->fd = fd; - q->filp = filp; - init_waitqueue_head(&q->waiters); get_futex_key_refs(&q->key); @@ -1006,7 +993,7 @@ queue_lock(struct futex_q *q, int fd, struct file *filp) return hb; } -static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) +static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) { int prio; @@ -1041,15 +1028,6 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) * exactly once. They are called with the hashed spinlock held. */ -/* The key must be already stored in q->key. */ -static void queue_me(struct futex_q *q, int fd, struct file *filp) -{ - struct futex_hash_bucket *hb; - - hb = queue_lock(q, fd, filp); - __queue_me(q, hb); -} - /* Return 1 if we were still queued (ie. 0 means we were woken) */ static int unqueue_me(struct futex_q *q) { @@ -1194,7 +1172,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (unlikely(ret != 0)) goto out_release_sem; - hb = queue_lock(&q, -1, NULL); + hb = queue_lock(&q); /* * Access the page AFTER the futex is queued. @@ -1238,7 +1216,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, goto out_unlock_release_sem; /* Only actually queue if *uaddr contained val. */ - __queue_me(&q, hb); + queue_me(&q, hb); /* * Now the futex is queued and we have checked the data, we @@ -1266,11 +1244,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (!abs_time) schedule(); else { - hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); t.timer.expires = *abs_time; - hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); + hrtimer_start(&t.timer, t.timer.expires, + HRTIMER_MODE_ABS); if (!hrtimer_active(&t.timer)) t.task = NULL; @@ -1286,6 +1266,8 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, /* Flag if a timeout occured */ rem = (t.task == NULL); + + destroy_hrtimer_on_stack(&t.timer); } } __set_current_state(TASK_RUNNING); @@ -1367,7 +1349,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, if (time) { to = &timeout; - hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); to->timer.expires = *time; } @@ -1381,7 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, goto out_release_sem; retry_unlocked: - hb = queue_lock(&q, -1, NULL); + hb = queue_lock(&q); retry_locked: ret = lock_taken = 0; @@ -1494,7 +1477,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, /* * Only actually queue now that the atomic ops are done: */ - __queue_me(&q, hb); + queue_me(&q, hb); /* * Now the futex is queued and we have checked the data, we @@ -1581,6 +1564,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, unqueue_me_pi(&q); futex_unlock_mm(fshared); + if (to) + destroy_hrtimer_on_stack(&to->timer); return ret != -EINTR ? ret : -ERESTARTNOINTR; out_unlock_release_sem: @@ -1588,6 +1573,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, out_release_sem: futex_unlock_mm(fshared); + if (to) + destroy_hrtimer_on_stack(&to->timer); return ret; uaddr_faulted: @@ -1615,6 +1602,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, if (!ret && (uval != -EFAULT)) goto retry; + if (to) + destroy_hrtimer_on_stack(&to->timer); return ret; } @@ -1735,121 +1724,6 @@ pi_faulted: return ret; } -static int futex_close(struct inode *inode, struct file *filp) -{ - struct futex_q *q = filp->private_data; - - unqueue_me(q); - kfree(q); - - return 0; -} - -/* This is one-shot: once it's gone off you need a new fd */ -static unsigned int futex_poll(struct file *filp, - struct poll_table_struct *wait) -{ - struct futex_q *q = filp->private_data; - int ret = 0; - - poll_wait(filp, &q->waiters, wait); - - /* - * plist_node_empty() is safe here without any lock. - * q->lock_ptr != 0 is not safe, because of ordering against wakeup. - */ - if (plist_node_empty(&q->list)) - ret = POLLIN | POLLRDNORM; - - return ret; -} - -static const struct file_operations futex_fops = { - .release = futex_close, - .poll = futex_poll, -}; - -/* - * Signal allows caller to avoid the race which would occur if they - * set the sigio stuff up afterwards. - */ -static int futex_fd(u32 __user *uaddr, int signal) -{ - struct futex_q *q; - struct file *filp; - int ret, err; - struct rw_semaphore *fshared; - static unsigned long printk_interval; - - if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { - printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " - "will be removed from the kernel in June 2007\n", - current->comm); - } - - ret = -EINVAL; - if (!valid_signal(signal)) - goto out; - - ret = get_unused_fd(); - if (ret < 0) - goto out; - filp = get_empty_filp(); - if (!filp) { - put_unused_fd(ret); - ret = -ENFILE; - goto out; - } - filp->f_op = &futex_fops; - filp->f_path.mnt = mntget(futex_mnt); - filp->f_path.dentry = dget(futex_mnt->mnt_root); - filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; - - if (signal) { - err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1); - if (err < 0) { - goto error; - } - filp->f_owner.signum = signal; - } - - q = kmalloc(sizeof(*q), GFP_KERNEL); - if (!q) { - err = -ENOMEM; - goto error; - } - q->pi_state = NULL; - - fshared = ¤t->mm->mmap_sem; - down_read(fshared); - err = get_futex_key(uaddr, fshared, &q->key); - - if (unlikely(err != 0)) { - up_read(fshared); - kfree(q); - goto error; - } - - /* - * queue_me() must be called before releasing mmap_sem, because - * key->shared.inode needs to be referenced while holding it. - */ - filp->private_data = q; - - queue_me(q, ret, filp); - up_read(fshared); - - /* Now we map fd to filp, so userspace can access it */ - fd_install(ret, filp); -out: - return ret; -error: - put_unused_fd(ret); - put_filp(filp); - ret = err; - goto out; -} - /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. @@ -2081,10 +1955,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_WAKE_BITSET: ret = futex_wake(uaddr, fshared, val, val3); break; - case FUTEX_FD: - /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ - ret = futex_fd(uaddr, val); - break; case FUTEX_REQUEUE: ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); break; @@ -2145,19 +2015,6 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } -static int futexfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "futex", NULL, FUTEXFS_SUPER_MAGIC, mnt); -} - -static struct file_system_type futex_fs_type = { - .name = "futexfs", - .get_sb = futexfs_get_sb, - .kill_sb = kill_anon_super, -}; - static int __init futex_init(void) { u32 curval; @@ -2182,16 +2039,6 @@ static int __init futex_init(void) spin_lock_init(&futex_queues[i].lock); } - i = register_filesystem(&futex_fs_type); - if (i) - return i; - - futex_mnt = kern_mount(&futex_fs_type); - if (IS_ERR(futex_mnt)) { - unregister_filesystem(&futex_fs_type); - return PTR_ERR(futex_mnt); - } - return 0; } __initcall(futex_init); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index dea4c9124ac..421be5fe5cc 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -43,6 +43,7 @@ #include <linux/tick.h> #include <linux/seq_file.h> #include <linux/err.h> +#include <linux/debugobjects.h> #include <asm/uaccess.h> @@ -153,15 +154,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) } /* - * Helper function to check, whether the timer is running the callback - * function - */ -static inline int hrtimer_callback_running(struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_CALLBACK; -} - -/* * Functions and macros which are different for UP/SMP systems are kept in a * single place */ @@ -342,6 +334,115 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) return res; } +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS + +static struct debug_obj_descr hrtimer_debug_descr; + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int hrtimer_fixup_init(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_init(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state) +{ + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + WARN_ON_ONCE(1); + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_free(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr hrtimer_debug_descr = { + .name = "hrtimer", + .fixup_init = hrtimer_fixup_init, + .fixup_activate = hrtimer_fixup_activate, + .fixup_free = hrtimer_fixup_free, +}; + +static inline void debug_hrtimer_init(struct hrtimer *timer) +{ + debug_object_init(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_activate(struct hrtimer *timer) +{ + debug_object_activate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) +{ + debug_object_deactivate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_free(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode); + +void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_object_init_on_stack(timer, &hrtimer_debug_descr); + __hrtimer_init(timer, clock_id, mode); +} + +void destroy_hrtimer_on_stack(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +#else +static inline void debug_hrtimer_init(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } +#endif + /* * Check, whether the timer is on the callback pending list */ @@ -567,6 +668,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* Timer is expired, act upon the callback mode */ switch(timer->cb_mode) { case HRTIMER_CB_IRQSAFE_NO_RESTART: + debug_hrtimer_deactivate(timer); /* * We can call the callback from here. No restart * happens, so no danger of recursion @@ -581,6 +683,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, * the tick timer in the softirq ! The calling site * takes care of this. */ + debug_hrtimer_deactivate(timer); return 1; case HRTIMER_CB_IRQSAFE: case HRTIMER_CB_SOFTIRQ: @@ -735,6 +838,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer *entry; int leftmost = 1; + debug_hrtimer_activate(timer); + /* * Find the right place in the rbtree: */ @@ -831,6 +936,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ + debug_hrtimer_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, @@ -878,6 +984,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) tim = ktime_add_safe(tim, base->resolution); #endif } + timer->expires = tim; timer_stats_hrtimer_set_start_info(timer); @@ -1011,14 +1118,8 @@ ktime_t hrtimer_get_next_event(void) } #endif -/** - * hrtimer_init - initialize a timer to the given clock - * @timer: the timer to be initialized - * @clock_id: the clock to be used - * @mode: timer mode abs/rel - */ -void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) { struct hrtimer_cpu_base *cpu_base; @@ -1039,6 +1140,19 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, memset(timer->start_comm, 0, TASK_COMM_LEN); #endif } + +/** + * hrtimer_init - initialize a timer to the given clock + * @timer: the timer to be initialized + * @clock_id: the clock to be used + * @mode: timer mode abs/rel + */ +void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_hrtimer_init(timer); + __hrtimer_init(timer, clock_id, mode); +} EXPORT_SYMBOL_GPL(hrtimer_init); /** @@ -1072,6 +1186,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) timer = list_entry(cpu_base->cb_pending.next, struct hrtimer, cb_entry); + debug_hrtimer_deactivate(timer); timer_stats_account_hrtimer(timer); fn = timer->function; @@ -1120,6 +1235,7 @@ static void __run_hrtimer(struct hrtimer *timer) enum hrtimer_restart (*fn)(struct hrtimer *); int restart; + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); @@ -1378,22 +1494,27 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; struct timespec __user *rmtp; + int ret = 0; - hrtimer_init(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, + HRTIMER_MODE_ABS); t.timer.expires.tv64 = restart->nanosleep.expires; if (do_nanosleep(&t, HRTIMER_MODE_ABS)) - return 0; + goto out; rmtp = restart->nanosleep.rmtp; if (rmtp) { - int ret = update_rmtp(&t.timer, rmtp); + ret = update_rmtp(&t.timer, rmtp); if (ret <= 0) - return ret; + goto out; } /* The other values in restart are already filled in */ - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, @@ -1401,20 +1522,23 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, { struct restart_block *restart; struct hrtimer_sleeper t; + int ret = 0; - hrtimer_init(&t.timer, clockid, mode); + hrtimer_init_on_stack(&t.timer, clockid, mode); t.timer.expires = timespec_to_ktime(*rqtp); if (do_nanosleep(&t, mode)) - return 0; + goto out; /* Absolute timers do not update the rmtp value and restart: */ - if (mode == HRTIMER_MODE_ABS) - return -ERESTARTNOHAND; + if (mode == HRTIMER_MODE_ABS) { + ret = -ERESTARTNOHAND; + goto out; + } if (rmtp) { - int ret = update_rmtp(&t.timer, rmtp); + ret = update_rmtp(&t.timer, rmtp); if (ret <= 0) - return ret; + goto out; } restart = ¤t_thread_info()->restart_block; @@ -1423,7 +1547,10 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart->nanosleep.rmtp = rmtp; restart->nanosleep.expires = t.timer.expires.tv64; - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } asmlinkage long @@ -1468,6 +1595,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); timer->base = new_base; /* diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 46e4ad1723f..46d6611a33b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -150,6 +150,26 @@ void disable_irq(unsigned int irq) } EXPORT_SYMBOL(disable_irq); +static void __enable_irq(struct irq_desc *desc, unsigned int irq) +{ + switch (desc->depth) { + case 0: + printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); + WARN_ON(1); + break; + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + + /* Prevent probing on this irq: */ + desc->status = status | IRQ_NOPROBE; + check_irq_resend(desc, irq); + /* fall-through */ + } + default: + desc->depth--; + } +} + /** * enable_irq - enable handling of an irq * @irq: Interrupt to enable @@ -169,22 +189,7 @@ void enable_irq(unsigned int irq) return; spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 0: - printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); - WARN_ON(1); - break; - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - - /* Prevent probing on this irq: */ - desc->status = status | IRQ_NOPROBE; - check_irq_resend(desc, irq); - /* fall-through */ - } - default: - desc->depth--; - } + __enable_irq(desc, irq); spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(enable_irq); @@ -365,7 +370,7 @@ int setup_irq(unsigned int irq, struct irqaction *new) compat_irq_chip_set_default_handler(desc); desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | - IRQ_INPROGRESS); + IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED); if (!(desc->status & IRQ_NOAUTOEN)) { desc->depth = 0; @@ -381,6 +386,16 @@ int setup_irq(unsigned int irq, struct irqaction *new) /* Reset broken irq detection when installing new handler */ desc->irq_count = 0; desc->irqs_unhandled = 0; + + /* + * Check whether we disabled the irq via the spurious handler + * before. Reenable it and give it another chance. + */ + if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) { + desc->status &= ~IRQ_SPURIOUS_DISABLED; + __enable_irq(desc, irq); + } + spin_unlock_irqrestore(&desc->lock, flags); new->irq = irq; diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 088dabbf2d6..c66d3f10e85 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -209,8 +209,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, * Now kill the IRQ */ printk(KERN_EMERG "Disabling IRQ #%d\n", irq); - desc->status |= IRQ_DISABLED; - desc->depth = 1; + desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; + desc->depth++; desc->chip->disable(irq); } desc->irqs_unhandled = 0; diff --git a/kernel/kexec.c b/kernel/kexec.c index cb85c79989b..1c5fcacbcf3 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1217,7 +1217,7 @@ static int __init parse_crashkernel_mem(char *cmdline, } /* match ? */ - if (system_ram >= start && system_ram <= end) { + if (system_ram >= start && system_ram < end) { *crash_size = size; break; } diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 1bd0ec1c80b..39e31a036f5 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -61,7 +61,7 @@ struct kgdb_state { int err_code; int cpu; int pass_exception; - long threadid; + unsigned long threadid; long kgdb_usethreadid; struct pt_regs *linux_regs; }; @@ -146,7 +146,7 @@ atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1); * the other CPUs might interfere with your debugging context, so * use this with care: */ -int kgdb_do_roundup = 1; +static int kgdb_do_roundup = 1; static int __init opt_nokgdbroundup(char *str) { @@ -438,7 +438,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count) * While we find nice hex chars, build a long_val. * Return number of chars processed. */ -int kgdb_hex2long(char **ptr, long *long_val) +int kgdb_hex2long(char **ptr, unsigned long *long_val) { int hex_val; int num = 0; @@ -709,7 +709,7 @@ int kgdb_isremovedbreak(unsigned long addr) return 0; } -int remove_all_break(void) +static int remove_all_break(void) { unsigned long addr; int error; diff --git a/kernel/kmod.c b/kernel/kmod.c index e2764047ec0..8df97d3dfda 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -27,6 +27,7 @@ #include <linux/mnt_namespace.h> #include <linux/completion.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/workqueue.h> #include <linux/security.h> #include <linux/mount.h> diff --git a/kernel/kthread.c b/kernel/kthread.c index ac72eea4833..bd1b9ea024e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -98,7 +98,7 @@ static void create_kthread(struct kthread_create_info *create) struct sched_param param = { .sched_priority = 0 }; wait_for_completion(&create->started); read_lock(&tasklist_lock); - create->result = find_task_by_pid(pid); + create->result = find_task_by_pid_ns(pid, &init_pid_ns); read_unlock(&tasklist_lock); /* * root may have changed our (kthreadd's) priority or CPU mask. diff --git a/kernel/marker.c b/kernel/marker.c index 139260e5460..b5a9fe1d50d 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -29,7 +29,7 @@ extern struct marker __start___markers[]; extern struct marker __stop___markers[]; /* Set to 1 to enable marker debug output */ -const int marker_debug; +static const int marker_debug; /* * markers_mutex nests inside module_mutex. Markers mutex protects the builtin diff --git a/kernel/module.c b/kernel/module.c index 8d6cccc6c3c..8e4528c9909 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -164,131 +164,140 @@ static const struct kernel_symbol *lookup_symbol(const char *name, return NULL; } -static void printk_unused_warning(const char *name) +static bool always_ok(bool gplok, bool warn, const char *name) { - printk(KERN_WARNING "Symbol %s is marked as UNUSED, " - "however this module is using it.\n", name); - printk(KERN_WARNING "This symbol will go away in the future.\n"); - printk(KERN_WARNING "Please evalute if this is the right api to use, " - "and if it really is, submit a report the linux kernel " - "mailinglist together with submitting your code for " - "inclusion.\n"); + return true; } -/* Find a symbol, return value, crc and module which owns it */ -static unsigned long __find_symbol(const char *name, - struct module **owner, - const unsigned long **crc, - int gplok) +static bool printk_unused_warning(bool gplok, bool warn, const char *name) { - struct module *mod; - const struct kernel_symbol *ks; - - /* Core kernel first. */ - *owner = NULL; - ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); - if (ks) { - *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); - return ks->value; - } - if (gplok) { - ks = lookup_symbol(name, __start___ksymtab_gpl, - __stop___ksymtab_gpl); - if (ks) { - *crc = symversion(__start___kcrctab_gpl, - (ks - __start___ksymtab_gpl)); - return ks->value; - } + if (warn) { + printk(KERN_WARNING "Symbol %s is marked as UNUSED, " + "however this module is using it.\n", name); + printk(KERN_WARNING + "This symbol will go away in the future.\n"); + printk(KERN_WARNING + "Please evalute if this is the right api to use and if " + "it really is, submit a report the linux kernel " + "mailinglist together with submitting your code for " + "inclusion.\n"); } - ks = lookup_symbol(name, __start___ksymtab_gpl_future, - __stop___ksymtab_gpl_future); - if (ks) { - if (!gplok) { - printk(KERN_WARNING "Symbol %s is being used " - "by a non-GPL module, which will not " - "be allowed in the future\n", name); - printk(KERN_WARNING "Please see the file " - "Documentation/feature-removal-schedule.txt " - "in the kernel source tree for more " - "details.\n"); - } - *crc = symversion(__start___kcrctab_gpl_future, - (ks - __start___ksymtab_gpl_future)); - return ks->value; + return true; +} + +static bool gpl_only_unused_warning(bool gplok, bool warn, const char *name) +{ + if (!gplok) + return false; + return printk_unused_warning(gplok, warn, name); +} + +static bool gpl_only(bool gplok, bool warn, const char *name) +{ + return gplok; +} + +static bool warn_if_not_gpl(bool gplok, bool warn, const char *name) +{ + if (!gplok && warn) { + printk(KERN_WARNING "Symbol %s is being used " + "by a non-GPL module, which will not " + "be allowed in the future\n", name); + printk(KERN_WARNING "Please see the file " + "Documentation/feature-removal-schedule.txt " + "in the kernel source tree for more details.\n"); } + return true; +} - ks = lookup_symbol(name, __start___ksymtab_unused, - __stop___ksymtab_unused); - if (ks) { - printk_unused_warning(name); - *crc = symversion(__start___kcrctab_unused, - (ks - __start___ksymtab_unused)); - return ks->value; +struct symsearch { + const struct kernel_symbol *start, *stop; + const unsigned long *crcs; + bool (*check)(bool gplok, bool warn, const char *name); +}; + +/* Look through this array of symbol tables for a symbol match which + * passes the check function. */ +static const struct kernel_symbol *search_symarrays(const struct symsearch *arr, + unsigned int num, + const char *name, + bool gplok, + bool warn, + const unsigned long **crc) +{ + unsigned int i; + const struct kernel_symbol *ks; + + for (i = 0; i < num; i++) { + ks = lookup_symbol(name, arr[i].start, arr[i].stop); + if (!ks || !arr[i].check(gplok, warn, name)) + continue; + + if (crc) + *crc = symversion(arr[i].crcs, ks - arr[i].start); + return ks; } + return NULL; +} - if (gplok) - ks = lookup_symbol(name, __start___ksymtab_unused_gpl, - __stop___ksymtab_unused_gpl); +/* Find a symbol, return value, (optional) crc and (optional) module + * which owns it */ +static unsigned long find_symbol(const char *name, + struct module **owner, + const unsigned long **crc, + bool gplok, + bool warn) +{ + struct module *mod; + const struct kernel_symbol *ks; + const struct symsearch arr[] = { + { __start___ksymtab, __stop___ksymtab, __start___kcrctab, + always_ok }, + { __start___ksymtab_gpl, __stop___ksymtab_gpl, + __start___kcrctab_gpl, gpl_only }, + { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, + __start___kcrctab_gpl_future, warn_if_not_gpl }, + { __start___ksymtab_unused, __stop___ksymtab_unused, + __start___kcrctab_unused, printk_unused_warning }, + { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, + __start___kcrctab_unused_gpl, gpl_only_unused_warning }, + }; + + /* Core kernel first. */ + ks = search_symarrays(arr, ARRAY_SIZE(arr), name, gplok, warn, crc); if (ks) { - printk_unused_warning(name); - *crc = symversion(__start___kcrctab_unused_gpl, - (ks - __start___ksymtab_unused_gpl)); + if (owner) + *owner = NULL; return ks->value; } /* Now try modules. */ list_for_each_entry(mod, &modules, list) { - *owner = mod; - ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); + struct symsearch arr[] = { + { mod->syms, mod->syms + mod->num_syms, mod->crcs, + always_ok }, + { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, + mod->gpl_crcs, gpl_only }, + { mod->gpl_future_syms, + mod->gpl_future_syms + mod->num_gpl_future_syms, + mod->gpl_future_crcs, warn_if_not_gpl }, + { mod->unused_syms, + mod->unused_syms + mod->num_unused_syms, + mod->unused_crcs, printk_unused_warning }, + { mod->unused_gpl_syms, + mod->unused_gpl_syms + mod->num_unused_gpl_syms, + mod->unused_gpl_crcs, gpl_only_unused_warning }, + }; + + ks = search_symarrays(arr, ARRAY_SIZE(arr), + name, gplok, warn, crc); if (ks) { - *crc = symversion(mod->crcs, (ks - mod->syms)); - return ks->value; - } - - if (gplok) { - ks = lookup_symbol(name, mod->gpl_syms, - mod->gpl_syms + mod->num_gpl_syms); - if (ks) { - *crc = symversion(mod->gpl_crcs, - (ks - mod->gpl_syms)); - return ks->value; - } - } - ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms); - if (ks) { - printk_unused_warning(name); - *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms)); - return ks->value; - } - - if (gplok) { - ks = lookup_symbol(name, mod->unused_gpl_syms, - mod->unused_gpl_syms + mod->num_unused_gpl_syms); - if (ks) { - printk_unused_warning(name); - *crc = symversion(mod->unused_gpl_crcs, - (ks - mod->unused_gpl_syms)); - return ks->value; - } - } - ks = lookup_symbol(name, mod->gpl_future_syms, - (mod->gpl_future_syms + - mod->num_gpl_future_syms)); - if (ks) { - if (!gplok) { - printk(KERN_WARNING "Symbol %s is being used " - "by a non-GPL module, which will not " - "be allowed in the future\n", name); - printk(KERN_WARNING "Please see the file " - "Documentation/feature-removal-schedule.txt " - "in the kernel source tree for more " - "details.\n"); - } - *crc = symversion(mod->gpl_future_crcs, - (ks - mod->gpl_future_syms)); + if (owner) + *owner = mod; return ks->value; } } + DEBUGP("Failed to find symbol %s\n", name); return -ENOENT; } @@ -736,12 +745,13 @@ sys_delete_module(const char __user *name_user, unsigned int flags) if (!forced && module_refcount(mod) != 0) wait_for_zero_refcount(mod); + mutex_unlock(&module_mutex); /* Final destruction now noone is using it. */ - if (mod->exit != NULL) { - mutex_unlock(&module_mutex); + if (mod->exit != NULL) mod->exit(); - mutex_lock(&module_mutex); - } + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + mutex_lock(&module_mutex); /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); free_module(mod); @@ -777,10 +787,9 @@ static void print_unload_info(struct seq_file *m, struct module *mod) void __symbol_put(const char *symbol) { struct module *owner; - const unsigned long *crc; preempt_disable(); - if (IS_ERR_VALUE(__find_symbol(symbol, &owner, &crc, 1))) + if (IS_ERR_VALUE(find_symbol(symbol, &owner, NULL, true, false))) BUG(); module_put(owner); preempt_enable(); @@ -881,6 +890,19 @@ static struct module_attribute *modinfo_attrs[] = { static const char vermagic[] = VERMAGIC_STRING; +static int try_to_force_load(struct module *mod, const char *symname) +{ +#ifdef CONFIG_MODULE_FORCE_LOAD + if (!(tainted & TAINT_FORCED_MODULE)) + printk("%s: no version for \"%s\" found: kernel tainted.\n", + mod->name, symname); + add_taint_module(mod, TAINT_FORCED_MODULE); + return 0; +#else + return -ENOEXEC; +#endif +} + #ifdef CONFIG_MODVERSIONS static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, @@ -905,18 +927,18 @@ static int check_version(Elf_Shdr *sechdrs, if (versions[i].crc == *crc) return 1; - printk("%s: disagrees about version of symbol %s\n", - mod->name, symname); DEBUGP("Found checksum %lX vs module %lX\n", *crc, versions[i].crc); - return 0; + goto bad_version; } - /* Not in module's version table. OK, but that taints the kernel. */ - if (!(tainted & TAINT_FORCED_MODULE)) - printk("%s: no version for \"%s\" found: kernel tainted.\n", - mod->name, symname); - add_taint_module(mod, TAINT_FORCED_MODULE); - return 1; + + if (!try_to_force_load(mod, symname)) + return 1; + +bad_version: + printk("%s: disagrees about version of symbol %s\n", + mod->name, symname); + return 0; } static inline int check_modstruct_version(Elf_Shdr *sechdrs, @@ -924,13 +946,10 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, struct module *mod) { const unsigned long *crc; - struct module *owner; - if (IS_ERR_VALUE(__find_symbol("struct_module", - &owner, &crc, 1))) + if (IS_ERR_VALUE(find_symbol("struct_module", NULL, &crc, true, false))) BUG(); - return check_version(sechdrs, versindex, "struct_module", mod, - crc); + return check_version(sechdrs, versindex, "struct_module", mod, crc); } /* First part is kernel version, which we ignore. */ @@ -974,8 +993,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, unsigned long ret; const unsigned long *crc; - ret = __find_symbol(name, &owner, &crc, - !(mod->taints & TAINT_PROPRIETARY_MODULE)); + ret = find_symbol(name, &owner, &crc, + !(mod->taints & TAINT_PROPRIETARY_MODULE), true); if (!IS_ERR_VALUE(ret)) { /* use_module can fail due to OOM, or module initialization or unloading */ @@ -991,6 +1010,20 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, * J. Corbet <corbet@lwn.net> */ #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) +struct module_sect_attr +{ + struct module_attribute mattr; + char *name; + unsigned long address; +}; + +struct module_sect_attrs +{ + struct attribute_group grp; + unsigned int nsections; + struct module_sect_attr attrs[0]; +}; + static ssize_t module_sect_show(struct module_attribute *mattr, struct module *mod, char *buf) { @@ -1001,7 +1034,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr, static void free_sect_attrs(struct module_sect_attrs *sect_attrs) { - int section; + unsigned int section; for (section = 0; section < sect_attrs->nsections; section++) kfree(sect_attrs->attrs[section].name); @@ -1362,10 +1395,9 @@ void *__symbol_get(const char *symbol) { struct module *owner; unsigned long value; - const unsigned long *crc; preempt_disable(); - value = __find_symbol(symbol, &owner, &crc, 1); + value = find_symbol(symbol, &owner, NULL, true, true); if (IS_ERR_VALUE(value)) value = 0; else if (strong_try_module_get(owner)) @@ -1382,33 +1414,33 @@ EXPORT_SYMBOL_GPL(__symbol_get); */ static int verify_export_symbols(struct module *mod) { - const char *name = NULL; - unsigned long i, ret = 0; + unsigned int i; struct module *owner; - const unsigned long *crc; - - for (i = 0; i < mod->num_syms; i++) - if (!IS_ERR_VALUE(__find_symbol(mod->syms[i].name, - &owner, &crc, 1))) { - name = mod->syms[i].name; - ret = -ENOEXEC; - goto dup; - } + const struct kernel_symbol *s; + struct { + const struct kernel_symbol *sym; + unsigned int num; + } arr[] = { + { mod->syms, mod->num_syms }, + { mod->gpl_syms, mod->num_gpl_syms }, + { mod->gpl_future_syms, mod->num_gpl_future_syms }, + { mod->unused_syms, mod->num_unused_syms }, + { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, + }; - for (i = 0; i < mod->num_gpl_syms; i++) - if (!IS_ERR_VALUE(__find_symbol(mod->gpl_syms[i].name, - &owner, &crc, 1))) { - name = mod->gpl_syms[i].name; - ret = -ENOEXEC; - goto dup; + for (i = 0; i < ARRAY_SIZE(arr); i++) { + for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { + if (!IS_ERR_VALUE(find_symbol(s->name, &owner, + NULL, true, false))) { + printk(KERN_ERR + "%s: exports duplicate symbol %s" + " (owned by %s)\n", + mod->name, s->name, module_name(owner)); + return -ENOEXEC; + } } - -dup: - if (ret) - printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n", - mod->name, name, module_name(owner)); - - return ret; + } + return 0; } /* Change all symbols so that st_value encodes the pointer directly. */ @@ -1814,8 +1846,9 @@ static struct module *load_module(void __user *umod, unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); #endif - /* Don't keep modinfo section */ + /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; + sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; #ifdef CONFIG_KALLSYMS /* Keep symbol and string tables for decoding later. */ sechdrs[symindex].sh_flags |= SHF_ALLOC; @@ -1833,9 +1866,9 @@ static struct module *load_module(void __user *umod, modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { - add_taint_module(mod, TAINT_FORCED_MODULE); - printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", - mod->name); + err = try_to_force_load(mod, "magic"); + if (err) + goto free_hdr; } else if (!same_magic(modmagic, vermagic)) { printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", mod->name, modmagic, vermagic); @@ -1977,7 +2010,8 @@ static struct module *load_module(void __user *umod, mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; if (unusedgplcrcindex) - mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr; + mod->unused_gpl_crcs + = (void *)sechdrs[unusedgplcrcindex].sh_addr; #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !crcindex) || @@ -1985,9 +2019,10 @@ static struct module *load_module(void __user *umod, (mod->num_gpl_future_syms && !gplfuturecrcindex) || (mod->num_unused_syms && !unusedcrcindex) || (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { - printk(KERN_WARNING "%s: No versions for exported symbols." - " Tainting kernel.\n", mod->name); - add_taint_module(mod, TAINT_FORCED_MODULE); + printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); + err = try_to_force_load(mod, "nocrc"); + if (err) + goto cleanup; } #endif markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); @@ -2171,6 +2206,8 @@ sys_init_module(void __user *umod, mod->state = MODULE_STATE_GOING; synchronize_sched(); module_put(mod); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); mutex_lock(&module_mutex); free_module(mod); mutex_unlock(&module_mutex); diff --git a/kernel/pid.c b/kernel/pid.c index 477691576b3..20d59fa2d49 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -111,10 +111,11 @@ EXPORT_SYMBOL(is_container_init); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); -static void free_pidmap(struct pid_namespace *pid_ns, int pid) +static void free_pidmap(struct upid *upid) { - struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE; - int offset = pid & BITS_PER_PAGE_MASK; + int nr = upid->nr; + struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE; + int offset = nr & BITS_PER_PAGE_MASK; clear_bit(offset, map->page); atomic_inc(&map->nr_free); @@ -232,7 +233,7 @@ void free_pid(struct pid *pid) spin_unlock_irqrestore(&pidmap_lock, flags); for (i = 0; i <= pid->level; i++) - free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr); + free_pidmap(pid->numbers + i); call_rcu(&pid->rcu, delayed_put_pid); } @@ -278,8 +279,8 @@ out: return pid; out_free: - for (i++; i <= ns->level; i++) - free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr); + while (++i <= ns->level) + free_pidmap(pid->numbers + i); kmem_cache_free(ns->pid_cachep, pid); pid = NULL; @@ -316,7 +317,7 @@ EXPORT_SYMBOL_GPL(find_pid); /* * attach_pid() must be called with the tasklist_lock write-held. */ -int attach_pid(struct task_struct *task, enum pid_type type, +void attach_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { struct pid_link *link; @@ -324,11 +325,10 @@ int attach_pid(struct task_struct *task, enum pid_type type, link = &task->pids[type]; link->pid = pid; hlist_add_head_rcu(&link->node, &pid->tasks[type]); - - return 0; } -void detach_pid(struct task_struct *task, enum pid_type type) +static void __change_pid(struct task_struct *task, enum pid_type type, + struct pid *new) { struct pid_link *link; struct pid *pid; @@ -338,7 +338,7 @@ void detach_pid(struct task_struct *task, enum pid_type type) pid = link->pid; hlist_del_rcu(&link->node); - link->pid = NULL; + link->pid = new; for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (!hlist_empty(&pid->tasks[tmp])) @@ -347,13 +347,24 @@ void detach_pid(struct task_struct *task, enum pid_type type) free_pid(pid); } +void detach_pid(struct task_struct *task, enum pid_type type) +{ + __change_pid(task, type, NULL); +} + +void change_pid(struct task_struct *task, enum pid_type type, + struct pid *pid) +{ + __change_pid(task, type, pid); + attach_pid(task, type, pid); +} + /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) { new->pids[type].pid = old->pids[type].pid; hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); - old->pids[type].pid = NULL; } struct task_struct *pid_task(struct pid *pid, enum pid_type type) @@ -380,12 +391,6 @@ struct task_struct *find_task_by_pid_type_ns(int type, int nr, EXPORT_SYMBOL(find_task_by_pid_type_ns); -struct task_struct *find_task_by_pid(pid_t nr) -{ - return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns); -} -EXPORT_SYMBOL(find_task_by_pid); - struct task_struct *find_task_by_vpid(pid_t vnr) { return find_task_by_pid_type_ns(PIDTYPE_PID, vnr, diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 5ca37fa50be..98702b4b885 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -66,7 +66,7 @@ err_alloc: return NULL; } -static struct pid_namespace *create_pid_namespace(int level) +static struct pid_namespace *create_pid_namespace(unsigned int level) { struct pid_namespace *ns; int i; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ae5c6c147c4..f1525ad06cb 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -4,8 +4,9 @@ #include <linux/sched.h> #include <linux/posix-timers.h> -#include <asm/uaccess.h> #include <linux/errno.h> +#include <linux/math64.h> +#include <asm/uaccess.h> static int check_clock(const clockid_t which_clock) { @@ -47,12 +48,10 @@ static void sample_to_timespec(const clockid_t which_clock, union cpu_time_count cpu, struct timespec *tp) { - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - tp->tv_sec = div_long_long_rem(cpu.sched, - NSEC_PER_SEC, &tp->tv_nsec); - } else { + if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) + *tp = ns_to_timespec(cpu.sched); + else cputime_to_timespec(cpu.cpu, tp); - } } static inline int cpu_time_before(const clockid_t which_clock, diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 8476956ffd9..dbd8398ddb0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -310,8 +310,7 @@ int posix_timer_event(struct k_itimer *timr,int si_private) if (timr->it_sigev_notify & SIGEV_THREAD_ID) { struct task_struct *leader; - int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); + int ret = send_sigqueue(timr->sigq, timr->it_process, 0); if (likely(ret >= 0)) return ret; @@ -322,8 +321,7 @@ int posix_timer_event(struct k_itimer *timr,int si_private) timr->it_process = leader; } - return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); + return send_sigqueue(timr->sigq, timr->it_process, 1); } EXPORT_SYMBOL_GPL(posix_timer_event); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 6233f3b4ae6..b45da40e8d2 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -19,16 +19,6 @@ config PM will issue the hlt instruction if nothing is to be done, thereby sending the processor to sleep and saving power. -config PM_LEGACY - bool "Legacy Power Management API (DEPRECATED)" - depends on PM - default n - ---help--- - Support for pm_register() and friends. This old API is obsoleted - by the driver model. - - If unsure, say N. - config PM_DEBUG bool "Power Management Debug Support" depends on PM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index f7dfff28ecd..597823b5b70 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -4,7 +4,6 @@ EXTRA_CFLAGS += -DDEBUG endif obj-y := main.o -obj-$(CONFIG_PM_LEGACY) += pm.o obj-$(CONFIG_PM_SLEEP) += process.o console.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o diff --git a/kernel/power/pm.c b/kernel/power/pm.c deleted file mode 100644 index 60c73fa670d..00000000000 --- a/kernel/power/pm.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * pm.c - Power management interface - * - * Copyright (C) 2000 Andrew Henroid - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/pm.h> -#include <linux/pm_legacy.h> -#include <linux/interrupt.h> -#include <linux/mutex.h> - -/* - * Locking notes: - * pm_devs_lock can be a semaphore providing pm ops are not called - * from an interrupt handler (already a bad idea so no change here). Each - * change must be protected so that an unlink of an entry doesn't clash - * with a pm send - which is permitted to sleep in the current architecture - * - * Module unloads clashing with pm events now work out safely, the module - * unload path will block until the event has been sent. It may well block - * until a resume but that will be fine. - */ - -static DEFINE_MUTEX(pm_devs_lock); -static LIST_HEAD(pm_devs); - -/** - * pm_register - register a device with power management - * @type: device type - * @id: device ID - * @callback: callback function - * - * Add a device to the list of devices that wish to be notified about - * power management events. A &pm_dev structure is returned on success, - * on failure the return is %NULL. - * - * The callback function will be called in process context and - * it may sleep. - */ - -struct pm_dev *pm_register(pm_dev_t type, - unsigned long id, - pm_callback callback) -{ - struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL); - if (dev) { - dev->type = type; - dev->id = id; - dev->callback = callback; - - mutex_lock(&pm_devs_lock); - list_add(&dev->entry, &pm_devs); - mutex_unlock(&pm_devs_lock); - } - return dev; -} - -/** - * pm_send - send request to a single device - * @dev: device to send to - * @rqst: power management request - * @data: data for the callback - * - * Issue a power management request to a given device. The - * %PM_SUSPEND and %PM_RESUME events are handled specially. The - * data field must hold the intended next state. No call is made - * if the state matches. - * - * BUGS: what stops two power management requests occurring in parallel - * and conflicting. - * - * WARNING: Calling pm_send directly is not generally recommended, in - * particular there is no locking against the pm_dev going away. The - * caller must maintain all needed locking or have 'inside knowledge' - * on the safety. Also remember that this function is not locked against - * pm_unregister. This means that you must handle SMP races on callback - * execution and unload yourself. - */ - -static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) -{ - int status = 0; - unsigned long prev_state, next_state; - - if (in_interrupt()) - BUG(); - - switch (rqst) { - case PM_SUSPEND: - case PM_RESUME: - prev_state = dev->state; - next_state = (unsigned long) data; - if (prev_state != next_state) { - if (dev->callback) - status = (*dev->callback)(dev, rqst, data); - if (!status) { - dev->state = next_state; - dev->prev_state = prev_state; - } - } - else { - dev->prev_state = prev_state; - } - break; - default: - if (dev->callback) - status = (*dev->callback)(dev, rqst, data); - break; - } - return status; -} - -/* - * Undo incomplete request - */ -static void pm_undo_all(struct pm_dev *last) -{ - struct list_head *entry = last->entry.prev; - while (entry != &pm_devs) { - struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); - if (dev->state != dev->prev_state) { - /* previous state was zero (running) resume or - * previous state was non-zero (suspended) suspend - */ - pm_request_t undo = (dev->prev_state - ? PM_SUSPEND:PM_RESUME); - pm_send(dev, undo, (void*) dev->prev_state); - } - entry = entry->prev; - } -} - -/** - * pm_send_all - send request to all managed devices - * @rqst: power management request - * @data: data for the callback - * - * Issue a power management request to a all devices. The - * %PM_SUSPEND events are handled specially. Any device is - * permitted to fail a suspend by returning a non zero (error) - * value from its callback function. If any device vetoes a - * suspend request then all other devices that have suspended - * during the processing of this request are restored to their - * previous state. - * - * WARNING: This function takes the pm_devs_lock. The lock is not dropped until - * the callbacks have completed. This prevents races against pm locking - * functions, races against module unload pm_unregister code. It does - * mean however that you must not issue pm_ functions within the callback - * or you will deadlock and users will hate you. - * - * Zero is returned on success. If a suspend fails then the status - * from the device that vetoes the suspend is returned. - * - * BUGS: what stops two power management requests occurring in parallel - * and conflicting. - */ - -int pm_send_all(pm_request_t rqst, void *data) -{ - struct list_head *entry; - - mutex_lock(&pm_devs_lock); - entry = pm_devs.next; - while (entry != &pm_devs) { - struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); - if (dev->callback) { - int status = pm_send(dev, rqst, data); - if (status) { - /* return devices to previous state on - * failed suspend request - */ - if (rqst == PM_SUSPEND) - pm_undo_all(dev); - mutex_unlock(&pm_devs_lock); - return status; - } - } - entry = entry->next; - } - mutex_unlock(&pm_devs_lock); - return 0; -} - -EXPORT_SYMBOL(pm_register); -EXPORT_SYMBOL(pm_send_all); - diff --git a/kernel/printk.c b/kernel/printk.c index d3f9c0f788b..8fb01c32aa3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -111,6 +111,9 @@ struct console_cmdline char name[8]; /* Name of the driver */ int index; /* Minor dev. to use */ char *options; /* Options for the driver */ +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + char *brl_options; /* Options for braille driver */ +#endif }; #define MAX_CMDLINECONSOLES 8 @@ -808,15 +811,60 @@ static void call_console_drivers(unsigned start, unsigned end) #endif +static int __add_preferred_console(char *name, int idx, char *options, + char *brl_options) +{ + struct console_cmdline *c; + int i; + + /* + * See if this tty is not yet registered, and + * if we have a slot free. + */ + for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) + if (strcmp(console_cmdline[i].name, name) == 0 && + console_cmdline[i].index == idx) { + if (!brl_options) + selected_console = i; + return 0; + } + if (i == MAX_CMDLINECONSOLES) + return -E2BIG; + if (!brl_options) + selected_console = i; + c = &console_cmdline[i]; + strlcpy(c->name, name, sizeof(c->name)); + c->options = options; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + c->brl_options = brl_options; +#endif + c->index = idx; + return 0; +} /* * Set up a list of consoles. Called from init/main.c */ static int __init console_setup(char *str) { char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ - char *s, *options; + char *s, *options, *brl_options = NULL; int idx; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + if (!memcmp(str, "brl,", 4)) { + brl_options = ""; + str += 4; + } else if (!memcmp(str, "brl=", 4)) { + brl_options = str + 4; + str = strchr(brl_options, ','); + if (!str) { + printk(KERN_ERR "need port name after brl=\n"); + return 1; + } + *(str++) = 0; + } +#endif + /* * Decode str into name, index, options. */ @@ -841,7 +889,7 @@ static int __init console_setup(char *str) idx = simple_strtoul(s, NULL, 10); *s = 0; - add_preferred_console(buf, idx, options); + __add_preferred_console(buf, idx, options, brl_options); return 1; } __setup("console=", console_setup); @@ -861,28 +909,7 @@ __setup("console=", console_setup); */ int add_preferred_console(char *name, int idx, char *options) { - struct console_cmdline *c; - int i; - - /* - * See if this tty is not yet registered, and - * if we have a slot free. - */ - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) - if (strcmp(console_cmdline[i].name, name) == 0 && - console_cmdline[i].index == idx) { - selected_console = i; - return 0; - } - if (i == MAX_CMDLINECONSOLES) - return -E2BIG; - selected_console = i; - c = &console_cmdline[i]; - memcpy(c->name, name, sizeof(c->name)); - c->name[sizeof(c->name) - 1] = 0; - c->options = options; - c->index = idx; - return 0; + return __add_preferred_console(name, idx, options, NULL); } int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) @@ -894,7 +921,7 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha if (strcmp(console_cmdline[i].name, name) == 0 && console_cmdline[i].index == idx) { c = &console_cmdline[i]; - memcpy(c->name, name_new, sizeof(c->name)); + strlcpy(c->name, name_new, sizeof(c->name)); c->name[sizeof(c->name) - 1] = 0; c->options = options; c->index = idx_new; @@ -1163,6 +1190,16 @@ void register_console(struct console *console) continue; if (console->index < 0) console->index = console_cmdline[i].index; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + if (console_cmdline[i].brl_options) { + console->flags |= CON_BRL; + braille_register_console(console, + console_cmdline[i].index, + console_cmdline[i].options, + console_cmdline[i].brl_options); + return; + } +#endif if (console->setup && console->setup(console, console_cmdline[i].options) != 0) break; @@ -1221,6 +1258,11 @@ int unregister_console(struct console *console) struct console *a, *b; int res = 1; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + if (console->flags & CON_BRL) + return braille_unregister_console(console); +#endif + acquire_console_sem(); if (console_drivers == console) { console_drivers=console->next; @@ -1272,8 +1314,8 @@ late_initcall(disable_boot_consoles); */ void tty_write_message(struct tty_struct *tty, char *msg) { - if (tty && tty->driver->write) - tty->driver->write(tty, msg, strlen(msg)); + if (tty && tty->ops->write) + tty->ops->write(tty, msg, strlen(msg)); return; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index dac4b4e5729..6c19e94fd0a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -73,7 +73,7 @@ void __ptrace_unlink(struct task_struct *child) BUG_ON(!child->ptrace); child->ptrace = 0; - if (!list_empty(&child->ptrace_list)) { + if (ptrace_reparented(child)) { list_del_init(&child->ptrace_list); remove_parent(child); child->parent = child->real_parent; @@ -168,8 +168,6 @@ int ptrace_attach(struct task_struct *task) audit_ptrace(task); retval = -EPERM; - if (task->pid <= 1) - goto out; if (same_thread_group(task, current)) goto out; @@ -208,8 +206,7 @@ repeat: __ptrace_link(task, current); - force_sig_specific(SIGSTOP, task); - + send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); @@ -522,12 +519,6 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) { struct task_struct *child; - /* - * Tracing init is not allowed. - */ - if (pid == 1) - return ERR_PTR(-EPERM); - read_lock(&tasklist_lock); child = find_task_by_vpid(pid); if (child) @@ -543,7 +534,6 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) #define arch_ptrace_attach(child) do { } while (0) #endif -#ifndef __ARCH_SYS_PTRACE asmlinkage long sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; @@ -591,7 +581,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) unlock_kernel(); return ret; } -#endif /* __ARCH_SYS_PTRACE */ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) { diff --git a/kernel/sched.c b/kernel/sched.c index e2f7f5acc80..58fb8af1577 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -75,16 +75,6 @@ #include <asm/irq_regs.h> /* - * Scheduler clock - returns current time in nanosec units. - * This is default implementation. - * Architectures and sub-architectures can override this. - */ -unsigned long long __attribute__((weak)) sched_clock(void) -{ - return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); -} - -/* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], * and back. @@ -242,6 +232,12 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) } #endif +/* + * sched_domains_mutex serializes calls to arch_init_sched_domains, + * detach_destroy_domains and partition_sched_domains. + */ +static DEFINE_MUTEX(sched_domains_mutex); + #ifdef CONFIG_GROUP_SCHED #include <linux/cgroup.h> @@ -308,9 +304,6 @@ static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; */ static DEFINE_SPINLOCK(task_group_lock); -/* doms_cur_mutex serializes access to doms_cur[] array */ -static DEFINE_MUTEX(doms_cur_mutex); - #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_USER_SCHED # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) @@ -318,7 +311,13 @@ static DEFINE_MUTEX(doms_cur_mutex); # define INIT_TASK_GROUP_LOAD NICE_0_LOAD #endif +/* + * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems. + * (The default weight is 1024 - so there's no practical + * limitation from this.) + */ #define MIN_SHARES 2 +#define MAX_SHARES (ULONG_MAX - 1) static int init_task_group_load = INIT_TASK_GROUP_LOAD; #endif @@ -358,21 +357,9 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) #endif } -static inline void lock_doms_cur(void) -{ - mutex_lock(&doms_cur_mutex); -} - -static inline void unlock_doms_cur(void) -{ - mutex_unlock(&doms_cur_mutex); -} - #else static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } -static inline void lock_doms_cur(void) { } -static inline void unlock_doms_cur(void) { } #endif /* CONFIG_GROUP_SCHED */ @@ -560,13 +547,7 @@ struct rq { unsigned long next_balance; struct mm_struct *prev_mm; - u64 clock, prev_clock_raw; - s64 clock_max_delta; - - unsigned int clock_warps, clock_overflows, clock_underflows; - u64 idle_clock; - unsigned int clock_deep_idle_events; - u64 tick_timestamp; + u64 clock; atomic_t nr_iowait; @@ -631,82 +612,6 @@ static inline int cpu_of(struct rq *rq) #endif } -#ifdef CONFIG_NO_HZ -static inline bool nohz_on(int cpu) -{ - return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE; -} - -static inline u64 max_skipped_ticks(struct rq *rq) -{ - return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1; -} - -static inline void update_last_tick_seen(struct rq *rq) -{ - rq->last_tick_seen = jiffies; -} -#else -static inline u64 max_skipped_ticks(struct rq *rq) -{ - return 1; -} - -static inline void update_last_tick_seen(struct rq *rq) -{ -} -#endif - -/* - * Update the per-runqueue clock, as finegrained as the platform can give - * us, but without assuming monotonicity, etc.: - */ -static void __update_rq_clock(struct rq *rq) -{ - u64 prev_raw = rq->prev_clock_raw; - u64 now = sched_clock(); - s64 delta = now - prev_raw; - u64 clock = rq->clock; - -#ifdef CONFIG_SCHED_DEBUG - WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); -#endif - /* - * Protect against sched_clock() occasionally going backwards: - */ - if (unlikely(delta < 0)) { - clock++; - rq->clock_warps++; - } else { - /* - * Catch too large forward jumps too: - */ - u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC; - u64 max_time = rq->tick_timestamp + max_jump; - - if (unlikely(clock + delta > max_time)) { - if (clock < max_time) - clock = max_time; - else - clock++; - rq->clock_overflows++; - } else { - if (unlikely(delta > rq->clock_max_delta)) - rq->clock_max_delta = delta; - clock += delta; - } - } - - rq->prev_clock_raw = now; - rq->clock = clock; -} - -static void update_rq_clock(struct rq *rq) -{ - if (likely(smp_processor_id() == cpu_of(rq))) - __update_rq_clock(rq); -} - /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -722,6 +627,11 @@ static void update_rq_clock(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) +static inline void update_rq_clock(struct rq *rq) +{ + rq->clock = sched_clock_cpu(cpu_of(rq)); +} + /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ @@ -757,14 +667,14 @@ const_debug unsigned int sysctl_sched_features = #define SCHED_FEAT(name, enabled) \ #name , -__read_mostly char *sched_feat_names[] = { +static __read_mostly char *sched_feat_names[] = { #include "sched_features.h" NULL }; #undef SCHED_FEAT -int sched_feat_open(struct inode *inode, struct file *filp) +static int sched_feat_open(struct inode *inode, struct file *filp) { filp->private_data = inode->i_private; return 0; @@ -899,7 +809,7 @@ static inline u64 global_rt_runtime(void) return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } -static const unsigned long long time_sync_thresh = 100000; +unsigned long long time_sync_thresh = 100000; static DEFINE_PER_CPU(unsigned long long, time_offset); static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); @@ -913,11 +823,14 @@ static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); static DEFINE_SPINLOCK(time_sync_lock); static unsigned long long prev_global_time; -static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) +static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu) { - unsigned long flags; - - spin_lock_irqsave(&time_sync_lock, flags); + /* + * We want this inlined, to not get tracer function calls + * in this critical section: + */ + spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_); + __raw_spin_lock(&time_sync_lock.raw_lock); if (time < prev_global_time) { per_cpu(time_offset, cpu) += prev_global_time - time; @@ -926,7 +839,8 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) prev_global_time = time; } - spin_unlock_irqrestore(&time_sync_lock, flags); + __raw_spin_unlock(&time_sync_lock.raw_lock); + spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_); return time; } @@ -934,8 +848,6 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) static unsigned long long __cpu_clock(int cpu) { unsigned long long now; - unsigned long flags; - struct rq *rq; /* * Only call sched_clock() if the scheduler has already been @@ -944,11 +856,7 @@ static unsigned long long __cpu_clock(int cpu) if (unlikely(!scheduler_running)) return 0; - local_irq_save(flags); - rq = cpu_rq(cpu); - update_rq_clock(rq); - now = rq->clock; - local_irq_restore(flags); + now = sched_clock_cpu(cpu); return now; } @@ -960,13 +868,18 @@ static unsigned long long __cpu_clock(int cpu) unsigned long long cpu_clock(int cpu) { unsigned long long prev_cpu_time, time, delta_time; + unsigned long flags; + local_irq_save(flags); prev_cpu_time = per_cpu(prev_cpu_time, cpu); time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); delta_time = time-prev_cpu_time; - if (unlikely(delta_time > time_sync_thresh)) + if (unlikely(delta_time > time_sync_thresh)) { time = __sync_cpu_clock(time, cpu); + per_cpu(prev_cpu_time, cpu) = time; + } + local_irq_restore(flags); return time; } @@ -1117,43 +1030,6 @@ static struct rq *this_rq_lock(void) return rq; } -/* - * We are going deep-idle (irqs are disabled): - */ -void sched_clock_idle_sleep_event(void) -{ - struct rq *rq = cpu_rq(smp_processor_id()); - - spin_lock(&rq->lock); - __update_rq_clock(rq); - spin_unlock(&rq->lock); - rq->clock_deep_idle_events++; -} -EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); - -/* - * We just idled delta nanoseconds (called with irqs disabled): - */ -void sched_clock_idle_wakeup_event(u64 delta_ns) -{ - struct rq *rq = cpu_rq(smp_processor_id()); - u64 now = sched_clock(); - - rq->idle_clock += delta_ns; - /* - * Override the previous timestamp and ignore all - * sched_clock() deltas that occured while we idled, - * and use the PM-provided delta_ns to advance the - * rq clock: - */ - spin_lock(&rq->lock); - rq->prev_clock_raw = now; - rq->clock += delta_ns; - spin_unlock(&rq->lock); - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); - static void __resched_task(struct task_struct *p, int tif_bit); static inline void resched_task(struct task_struct *p) @@ -1189,6 +1065,7 @@ static inline void resched_rq(struct rq *rq) enum { HRTICK_SET, /* re-programm hrtick_timer */ HRTICK_RESET, /* not a new slice */ + HRTICK_BLOCK, /* stop hrtick operations */ }; /* @@ -1200,6 +1077,8 @@ static inline int hrtick_enabled(struct rq *rq) { if (!sched_feat(HRTICK)) return 0; + if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags))) + return 0; return hrtimer_is_hres_active(&rq->hrtick_timer); } @@ -1275,14 +1154,70 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); spin_lock(&rq->lock); - __update_rq_clock(rq); + update_rq_clock(rq); rq->curr->sched_class->task_tick(rq, rq->curr, 1); spin_unlock(&rq->lock); return HRTIMER_NORESTART; } -static inline void init_rq_hrtick(struct rq *rq) +static void hotplug_hrtick_disable(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + rq->hrtick_flags = 0; + __set_bit(HRTICK_BLOCK, &rq->hrtick_flags); + spin_unlock_irqrestore(&rq->lock, flags); + + hrtick_clear(rq); +} + +static void hotplug_hrtick_enable(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags); + spin_unlock_irqrestore(&rq->lock, flags); +} + +static int +hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + int cpu = (int)(long)hcpu; + + switch (action) { + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + hotplug_hrtick_disable(cpu); + return NOTIFY_OK; + + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hotplug_hrtick_enable(cpu); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static void init_hrtick(void) +{ + hotcpu_notifier(hotplug_hrtick, 0); +} + +static void init_rq_hrtick(struct rq *rq) { rq->hrtick_flags = 0; hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1319,6 +1254,10 @@ static inline void init_rq_hrtick(struct rq *rq) void hrtick_resched(void) { } + +static inline void init_hrtick(void) +{ +} #endif /* @@ -1438,8 +1377,8 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, { u64 tmp; - if (unlikely(!lw->inv_weight)) - lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1); + if (!lw->inv_weight) + lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1); tmp = (u64)delta_exec * weight; /* @@ -1748,6 +1687,8 @@ __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd, if (shares < MIN_SHARES) shares = MIN_SHARES; + else if (shares > MAX_SHARES) + shares = MAX_SHARES; __set_se_shares(tg->se[tcpu], shares); } @@ -4339,8 +4280,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset, struct rq *rq = this_rq(); cputime64_t tmp; - if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) - return account_guest_time(p, cputime); + if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { + account_guest_time(p, cputime); + return; + } p->stime = cputime_add(p->stime, cputime); @@ -4404,19 +4347,11 @@ void scheduler_tick(void) int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; - u64 next_tick = rq->tick_timestamp + TICK_NSEC; + + sched_clock_tick(); spin_lock(&rq->lock); - __update_rq_clock(rq); - /* - * Let rq->clock advance by at least TICK_NSEC: - */ - if (unlikely(rq->clock < next_tick)) { - rq->clock = next_tick; - rq->clock_underflows++; - } - rq->tick_timestamp = rq->clock; - update_last_tick_seen(rq); + update_rq_clock(rq); update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); @@ -4570,7 +4505,7 @@ need_resched_nonpreemptible: * Do the rq-clock update outside the rq lock: */ local_irq_disable(); - __update_rq_clock(rq); + update_rq_clock(rq); spin_lock(&rq->lock); clear_tsk_need_resched(prev); @@ -4595,9 +4530,9 @@ need_resched_nonpreemptible: prev->sched_class->put_prev_task(rq, prev); next = pick_next_task(rq, prev); - sched_info_switch(prev, next); - if (likely(prev != next)) { + sched_info_switch(prev, next); + rq->nr_switches++; rq->curr = next; ++*switch_count; @@ -7755,7 +7690,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, { int i, j; - lock_doms_cur(); + mutex_lock(&sched_domains_mutex); /* always unregister in case we don't destroy any domains */ unregister_sched_domain_sysctl(); @@ -7804,7 +7739,7 @@ match2: register_sched_domain_sysctl(); - unlock_doms_cur(); + mutex_unlock(&sched_domains_mutex); } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) @@ -7813,8 +7748,10 @@ int arch_reinit_sched_domains(void) int err; get_online_cpus(); + mutex_lock(&sched_domains_mutex); detach_destroy_domains(&cpu_online_map); err = arch_init_sched_domains(&cpu_online_map); + mutex_unlock(&sched_domains_mutex); put_online_cpus(); return err; @@ -7932,13 +7869,16 @@ void __init sched_init_smp(void) BUG_ON(sched_group_nodes_bycpu == NULL); #endif get_online_cpus(); + mutex_lock(&sched_domains_mutex); arch_init_sched_domains(&cpu_online_map); cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); if (cpus_empty(non_isolated_cpus)) cpu_set(smp_processor_id(), non_isolated_cpus); + mutex_unlock(&sched_domains_mutex); put_online_cpus(); /* XXX: Theoretical race here - CPU may be hotplugged now */ hotcpu_notifier(update_sched_domains, 0); + init_hrtick(); /* Move init over to a non-isolated CPU */ if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) @@ -8025,7 +7965,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, se->my_q = cfs_rq; se->load.weight = tg->shares; - se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); + se->load.inv_weight = 0; se->parent = parent; } #endif @@ -8149,8 +8089,6 @@ void __init sched_init(void) spin_lock_init(&rq->lock); lockdep_set_class(&rq->lock, &rq->rq_lock_key); rq->nr_running = 0; - rq->clock = 1; - update_last_tick_seen(rq); init_cfs_rq(&rq->cfs, rq); init_rt_rq(&rq->rt, rq); #ifdef CONFIG_FAIR_GROUP_SCHED @@ -8294,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep); static void normalize_task(struct rq *rq, struct task_struct *p) { int on_rq; + update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) @@ -8325,7 +8264,6 @@ void normalize_rt_tasks(void) p->se.sleep_start = 0; p->se.block_start = 0; #endif - task_rq(p)->clock = 0; if (!rt_task(p)) { /* @@ -8692,7 +8630,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) dequeue_entity(cfs_rq, se, 0); se->load.weight = shares; - se->load.inv_weight = div64_64((1ULL<<32), shares); + se->load.inv_weight = 0; if (on_rq) enqueue_entity(cfs_rq, se, 0); @@ -8722,13 +8660,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) if (!tg->se[0]) return -EINVAL; - /* - * A weight of 0 or 1 can cause arithmetics problems. - * (The default weight is 1024 - so there's no practical - * limitation from this.) - */ if (shares < MIN_SHARES) shares = MIN_SHARES; + else if (shares > MAX_SHARES) + shares = MAX_SHARES; mutex_lock(&shares_mutex); if (tg->shares == shares) @@ -8753,7 +8688,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * force a rebalance */ cfs_rq_set_shares(tg->cfs_rq[i], 0); - set_se_shares(tg->se[i], shares/nr_cpu_ids); + set_se_shares(tg->se[i], shares); } /* @@ -8787,7 +8722,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) if (runtime == RUNTIME_INF) return 1ULL << 16; - return div64_64(runtime << 16, period); + return div64_u64(runtime << 16, period); } #ifdef CONFIG_CGROUP_SCHED diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c new file mode 100644 index 00000000000..9c597e37f7d --- /dev/null +++ b/kernel/sched_clock.c @@ -0,0 +1,236 @@ +/* + * sched_clock for unstable cpu clocks + * + * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * + * Based on code by: + * Ingo Molnar <mingo@redhat.com> + * Guillaume Chazarain <guichaz@gmail.com> + * + * Create a semi stable clock from a mixture of other events, including: + * - gtod + * - jiffies + * - sched_clock() + * - explicit idle events + * + * We use gtod as base and the unstable clock deltas. The deltas are filtered, + * making it monotonic and keeping it within an expected window. This window + * is set up using jiffies. + * + * Furthermore, explicit sleep and wakeup hooks allow us to account for time + * that is otherwise invisible (TSC gets stopped). + * + * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat + * consistent between cpus (never more than 1 jiffies difference). + */ +#include <linux/sched.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> +#include <linux/ktime.h> +#include <linux/module.h> + + +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + +struct sched_clock_data { + /* + * Raw spinlock - this is a special case: this might be called + * from within instrumentation code so we dont want to do any + * instrumentation ourselves. + */ + raw_spinlock_t lock; + + unsigned long prev_jiffies; + u64 prev_raw; + u64 tick_raw; + u64 tick_gtod; + u64 clock; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); + +static inline struct sched_clock_data *this_scd(void) +{ + return &__get_cpu_var(sched_clock_data); +} + +static inline struct sched_clock_data *cpu_sdc(int cpu) +{ + return &per_cpu(sched_clock_data, cpu); +} + +void sched_clock_init(void) +{ + u64 ktime_now = ktime_to_ns(ktime_get()); + u64 now = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct sched_clock_data *scd = cpu_sdc(cpu); + + scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + scd->prev_jiffies = jiffies; + scd->prev_raw = now; + scd->tick_raw = now; + scd->tick_gtod = ktime_now; + scd->clock = ktime_now; + } +} + +/* + * update the percpu scd from the raw @now value + * + * - filter out backward motion + * - use jiffies to generate a min,max window to clip the raw values + */ +static void __update_sched_clock(struct sched_clock_data *scd, u64 now) +{ + unsigned long now_jiffies = jiffies; + long delta_jiffies = now_jiffies - scd->prev_jiffies; + u64 clock = scd->clock; + u64 min_clock, max_clock; + s64 delta = now - scd->prev_raw; + + WARN_ON_ONCE(!irqs_disabled()); + min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC; + + if (unlikely(delta < 0)) { + clock++; + goto out; + } + + max_clock = min_clock + TICK_NSEC; + + if (unlikely(clock + delta > max_clock)) { + if (clock < max_clock) + clock = max_clock; + else + clock++; + } else { + clock += delta; + } + + out: + if (unlikely(clock < min_clock)) + clock = min_clock; + + scd->prev_raw = now; + scd->prev_jiffies = now_jiffies; + scd->clock = clock; +} + +static void lock_double_clock(struct sched_clock_data *data1, + struct sched_clock_data *data2) +{ + if (data1 < data2) { + __raw_spin_lock(&data1->lock); + __raw_spin_lock(&data2->lock); + } else { + __raw_spin_lock(&data2->lock); + __raw_spin_lock(&data1->lock); + } +} + +u64 sched_clock_cpu(int cpu) +{ + struct sched_clock_data *scd = cpu_sdc(cpu); + u64 now, clock; + + WARN_ON_ONCE(!irqs_disabled()); + now = sched_clock(); + + if (cpu != raw_smp_processor_id()) { + /* + * in order to update a remote cpu's clock based on our + * unstable raw time rebase it against: + * tick_raw (offset between raw counters) + * tick_gotd (tick offset between cpus) + */ + struct sched_clock_data *my_scd = this_scd(); + + lock_double_clock(scd, my_scd); + + now -= my_scd->tick_raw; + now += scd->tick_raw; + + now -= my_scd->tick_gtod; + now += scd->tick_gtod; + + __raw_spin_unlock(&my_scd->lock); + } else { + __raw_spin_lock(&scd->lock); + } + + __update_sched_clock(scd, now); + clock = scd->clock; + + __raw_spin_unlock(&scd->lock); + + return clock; +} + +void sched_clock_tick(void) +{ + struct sched_clock_data *scd = this_scd(); + u64 now, now_gtod; + + WARN_ON_ONCE(!irqs_disabled()); + + now = sched_clock(); + now_gtod = ktime_to_ns(ktime_get()); + + __raw_spin_lock(&scd->lock); + __update_sched_clock(scd, now); + /* + * update tick_gtod after __update_sched_clock() because that will + * already observe 1 new jiffy; adding a new tick_gtod to that would + * increase the clock 2 jiffies. + */ + scd->tick_raw = now; + scd->tick_gtod = now_gtod; + __raw_spin_unlock(&scd->lock); +} + +/* + * We are going deep-idle (irqs are disabled): + */ +void sched_clock_idle_sleep_event(void) +{ + sched_clock_cpu(smp_processor_id()); +} +EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); + +/* + * We just idled delta nanoseconds (called with irqs disabled): + */ +void sched_clock_idle_wakeup_event(u64 delta_ns) +{ + struct sched_clock_data *scd = this_scd(); + u64 now = sched_clock(); + + /* + * Override the previous timestamp and ignore all + * sched_clock() deltas that occured while we idled, + * and use the PM-provided delta_ns to advance the + * rq clock: + */ + __raw_spin_lock(&scd->lock); + scd->prev_raw = now; + scd->clock += delta_ns; + __raw_spin_unlock(&scd->lock); + + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); + +#endif + +/* + * Scheduler clock - returns current time in nanosec units. + * This is default implementation. + * Architectures and sub-architectures can override this. + */ +unsigned long long __attribute__((weak)) sched_clock(void) +{ + return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); +} diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 8a9498e7c83..5f06118fbc3 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m, int cpu) PN(next_balance); P(curr->pid); PN(clock); - PN(idle_clock); - PN(prev_clock_raw); - P(clock_warps); - P(clock_overflows); - P(clock_underflows); - P(clock_deep_idle_events); - PN(clock_max_delta); P(cpu_load[0]); P(cpu_load[1]); P(cpu_load[2]); @@ -357,8 +350,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) avg_per_cpu = p->se.sum_exec_runtime; if (p->se.nr_migrations) { - avg_per_cpu = div64_64(avg_per_cpu, - p->se.nr_migrations); + avg_per_cpu = div64_u64(avg_per_cpu, + p->se.nr_migrations); } else { avg_per_cpu = -1LL; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 89fa32b4edf..c863663d204 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -682,6 +682,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); + account_entity_enqueue(cfs_rq, se); if (wakeup) { place_entity(cfs_rq, se, 0); @@ -692,7 +693,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) check_spread(cfs_rq, se); if (se != cfs_rq->curr) __enqueue_entity(cfs_rq, se); - account_entity_enqueue(cfs_rq, se); } static void update_avg(u64 *avg, u64 sample) @@ -841,8 +841,10 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * queued ticks are scheduled to match the slice, so don't bother * validating it and just reschedule. */ - if (queued) - return resched_task(rq_of(cfs_rq)->curr); + if (queued) { + resched_task(rq_of(cfs_rq)->curr); + return; + } /* * don't let the period tick interfere with the hrtick preemption */ @@ -957,7 +959,7 @@ static void yield_task_fair(struct rq *rq) return; if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { - __update_rq_clock(rq); + update_rq_clock(rq); /* * Update run-time statistics of the 'current'. */ @@ -1007,7 +1009,7 @@ static int wake_idle(int cpu, struct task_struct *p) * sibling runqueue info. This will avoid the checks and cache miss * penalities associated with that. */ - if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1) + if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1) return cpu; for_each_domain(cpu, sd) { @@ -1611,30 +1613,6 @@ static const struct sched_class fair_sched_class = { }; #ifdef CONFIG_SCHED_DEBUG -static void -print_cfs_rq_tasks(struct seq_file *m, struct cfs_rq *cfs_rq, int depth) -{ - struct sched_entity *se; - - if (!cfs_rq) - return; - - list_for_each_entry_rcu(se, &cfs_rq->tasks, group_node) { - int i; - - for (i = depth; i; i--) - seq_puts(m, " "); - - seq_printf(m, "%lu %s %lu\n", - se->load.weight, - entity_is_task(se) ? "T" : "G", - calc_delta_weight(SCHED_LOAD_SCALE, se) - ); - if (!entity_is_task(se)) - print_cfs_rq_tasks(m, group_cfs_rq(se), depth + 1); - } -} - static void print_cfs_stats(struct seq_file *m, int cpu) { struct cfs_rq *cfs_rq; @@ -1642,9 +1620,6 @@ static void print_cfs_stats(struct seq_file *m, int cpu) rcu_read_lock(); for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) print_cfs_rq(m, cpu, cfs_rq); - - seq_printf(m, "\nWeight tree:\n"); - print_cfs_rq_tasks(m, &cpu_rq(cpu)->cfs, 1); rcu_read_unlock(); } #endif diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 2bcafa37563..3a4f92dbbe6 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -99,7 +99,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p, /* * Simple, special scheduling class for the per-CPU idle tasks: */ -const struct sched_class idle_sched_class = { +static const struct sched_class idle_sched_class = { /* .next is NULL */ /* no enqueue/yield_task for idle tasks */ diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c2730a5a4f0..060e87b0cb1 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1098,11 +1098,14 @@ static void post_schedule_rt(struct rq *rq) } } - +/* + * If we are not running and we are not going to reschedule soon, we should + * try to push tasks away now + */ static void task_wake_up_rt(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && - (p->prio >= rq->rt.highest_prio) && + !test_tsk_need_resched(rq->curr) && rq->rt.overloaded) push_rt_tasks(rq); } @@ -1309,7 +1312,7 @@ static void set_curr_task_rt(struct rq *rq) p->se.exec_start = rq->clock; } -const struct sched_class rt_sched_class = { +static const struct sched_class rt_sched_class = { .next = &fair_sched_class, .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, diff --git a/kernel/signal.c b/kernel/signal.c index 64ad0ed1599..72bb4f51f96 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -39,11 +39,19 @@ static struct kmem_cache *sigqueue_cachep; +static int __sig_ignored(struct task_struct *t, int sig) +{ + void __user *handler; + + /* Is it explicitly or implicitly ignored? */ + + handler = t->sighand->action[sig - 1].sa.sa_handler; + return handler == SIG_IGN || + (handler == SIG_DFL && sig_kernel_ignore(sig)); +} static int sig_ignored(struct task_struct *t, int sig) { - void __user * handler; - /* * Tracers always want to know about signals.. */ @@ -58,10 +66,7 @@ static int sig_ignored(struct task_struct *t, int sig) if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return 0; - /* Is it explicitly or implicitly ignored? */ - handler = t->sighand->action[sig-1].sa.sa_handler; - return handler == SIG_IGN || - (handler == SIG_DFL && sig_kernel_ignore(sig)); + return __sig_ignored(t, sig); } /* @@ -372,7 +377,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, */ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) { - int signr = 0; + int signr; /* We only dequeue private signals from ourselves, we don't let * signalfd steal them @@ -405,8 +410,12 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) } } } + recalc_sigpending(); - if (signr && unlikely(sig_kernel_stop(signr))) { + if (!signr) + return 0; + + if (unlikely(sig_kernel_stop(signr))) { /* * Set a marker that we have dequeued a stop signal. Our * caller might release the siglock and then the pending @@ -422,9 +431,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } - if (signr && - ((info->si_code & __SI_MASK) == __SI_TIMER) && - info->si_sys_private) { + if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { /* * Release the siglock to ensure proper locking order * of timer locks outside of siglocks. Note, we leave @@ -526,21 +533,34 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s) static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { - int error = -EINVAL; + struct pid *sid; + int error; + if (!valid_signal(sig)) - return error; + return -EINVAL; - if (info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) { - error = audit_signal_info(sig, t); /* Let audit system see the signal */ - if (error) - return error; - error = -EPERM; - if (((sig != SIGCONT) || - (task_session_nr(current) != task_session_nr(t))) - && (current->euid ^ t->suid) && (current->euid ^ t->uid) - && (current->uid ^ t->suid) && (current->uid ^ t->uid) - && !capable(CAP_KILL)) + if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info))) + return 0; + + error = audit_signal_info(sig, t); /* Let audit system see the signal */ + if (error) return error; + + if ((current->euid ^ t->suid) && (current->euid ^ t->uid) && + (current->uid ^ t->suid) && (current->uid ^ t->uid) && + !capable(CAP_KILL)) { + switch (sig) { + case SIGCONT: + sid = task_session(t); + /* + * We don't return the error if sid == NULL. The + * task was unhashed, the caller must notice this. + */ + if (!sid || sid == task_session(current)) + break; + default: + return -EPERM; + } } return security_task_kill(t, info, sig, 0); @@ -550,62 +570,44 @@ static int check_kill_permission(int sig, struct siginfo *info, static void do_notify_parent_cldstop(struct task_struct *tsk, int why); /* - * Handle magic process-wide effects of stop/continue signals. - * Unlike the signal actions, these happen immediately at signal-generation + * Handle magic process-wide effects of stop/continue signals. Unlike + * the signal actions, these happen immediately at signal-generation * time regardless of blocking, ignoring, or handling. This does the * actual continuing for SIGCONT, but not the actual stopping for stop - * signals. The process stop is done as a signal action for SIG_DFL. + * signals. The process stop is done as a signal action for SIG_DFL. + * + * Returns true if the signal should be actually delivered, otherwise + * it should be dropped. */ -static void handle_stop_signal(int sig, struct task_struct *p) +static int prepare_signal(int sig, struct task_struct *p) { + struct signal_struct *signal = p->signal; struct task_struct *t; - if (p->signal->flags & SIGNAL_GROUP_EXIT) + if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) { /* - * The process is in the middle of dying already. + * The process is in the middle of dying, nothing to do. */ - return; - - if (sig_kernel_stop(sig)) { + } else if (sig_kernel_stop(sig)) { /* * This is a stop signal. Remove SIGCONT from all queues. */ - rm_from_queue(sigmask(SIGCONT), &p->signal->shared_pending); + rm_from_queue(sigmask(SIGCONT), &signal->shared_pending); t = p; do { rm_from_queue(sigmask(SIGCONT), &t->pending); - t = next_thread(t); - } while (t != p); + } while_each_thread(p, t); } else if (sig == SIGCONT) { + unsigned int why; /* * Remove all stop signals from all queues, * and wake all threads. */ - if (unlikely(p->signal->group_stop_count > 0)) { - /* - * There was a group stop in progress. We'll - * pretend it finished before we got here. We are - * obliged to report it to the parent: if the - * SIGSTOP happened "after" this SIGCONT, then it - * would have cleared this pending SIGCONT. If it - * happened "before" this SIGCONT, then the parent - * got the SIGCHLD about the stop finishing before - * the continue happened. We do the notification - * now, and it's as if the stop had finished and - * the SIGCHLD was pending on entry to this kill. - */ - p->signal->group_stop_count = 0; - p->signal->flags = SIGNAL_STOP_CONTINUED; - spin_unlock(&p->sighand->siglock); - do_notify_parent_cldstop(p, CLD_STOPPED); - spin_lock(&p->sighand->siglock); - } - rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); + rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); t = p; do { unsigned int state; rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); - /* * If there is a handler for SIGCONT, we must make * sure that no thread returns to user mode before @@ -615,7 +617,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) * running the handler. With the TIF_SIGPENDING * flag set, the thread will pause and acquire the * siglock that we hold now and until we've queued - * the pending signal. + * the pending signal. * * Wake up the stopped thread _after_ setting * TIF_SIGPENDING @@ -626,49 +628,163 @@ static void handle_stop_signal(int sig, struct task_struct *p) state |= TASK_INTERRUPTIBLE; } wake_up_state(t, state); + } while_each_thread(p, t); - t = next_thread(t); - } while (t != p); + /* + * Notify the parent with CLD_CONTINUED if we were stopped. + * + * If we were in the middle of a group stop, we pretend it + * was already finished, and then continued. Since SIGCHLD + * doesn't queue we report only CLD_STOPPED, as if the next + * CLD_CONTINUED was dropped. + */ + why = 0; + if (signal->flags & SIGNAL_STOP_STOPPED) + why |= SIGNAL_CLD_CONTINUED; + else if (signal->group_stop_count) + why |= SIGNAL_CLD_STOPPED; - if (p->signal->flags & SIGNAL_STOP_STOPPED) { + if (why) { /* - * We were in fact stopped, and are now continued. - * Notify the parent with CLD_CONTINUED. + * The first thread which returns from finish_stop() + * will take ->siglock, notice SIGNAL_CLD_MASK, and + * notify its parent. See get_signal_to_deliver(). */ - p->signal->flags = SIGNAL_STOP_CONTINUED; - p->signal->group_exit_code = 0; - spin_unlock(&p->sighand->siglock); - do_notify_parent_cldstop(p, CLD_CONTINUED); - spin_lock(&p->sighand->siglock); + signal->flags = why | SIGNAL_STOP_CONTINUED; + signal->group_stop_count = 0; + signal->group_exit_code = 0; } else { /* * We are not stopped, but there could be a stop * signal in the middle of being processed after * being removed from the queue. Clear that too. */ - p->signal->flags = 0; + signal->flags &= ~SIGNAL_STOP_DEQUEUED; } - } else if (sig == SIGKILL) { + } + + return !sig_ignored(p, sig); +} + +/* + * Test if P wants to take SIG. After we've checked all threads with this, + * it's equivalent to finding no threads not blocking SIG. Any threads not + * blocking SIG were ruled out because they are not running and already + * have pending signals. Such threads will dequeue from the shared queue + * as soon as they're available, so putting the signal on the shared queue + * will be equivalent to sending it to one such thread. + */ +static inline int wants_signal(int sig, struct task_struct *p) +{ + if (sigismember(&p->blocked, sig)) + return 0; + if (p->flags & PF_EXITING) + return 0; + if (sig == SIGKILL) + return 1; + if (task_is_stopped_or_traced(p)) + return 0; + return task_curr(p) || !signal_pending(p); +} + +static void complete_signal(int sig, struct task_struct *p, int group) +{ + struct signal_struct *signal = p->signal; + struct task_struct *t; + + /* + * Now find a thread we can wake up to take the signal off the queue. + * + * If the main thread wants the signal, it gets first crack. + * Probably the least surprising to the average bear. + */ + if (wants_signal(sig, p)) + t = p; + else if (!group || thread_group_empty(p)) + /* + * There is just one thread and it does not need to be woken. + * It will dequeue unblocked signals before it runs again. + */ + return; + else { /* - * Make sure that any pending stop signal already dequeued - * is undone by the wakeup for SIGKILL. + * Otherwise try to find a suitable thread. */ - p->signal->flags = 0; + t = signal->curr_target; + while (!wants_signal(sig, t)) { + t = next_thread(t); + if (t == signal->curr_target) + /* + * No thread needs to be woken. + * Any eligible threads will see + * the signal in the queue soon. + */ + return; + } + signal->curr_target = t; } + + /* + * Found a killable thread. If the signal will be fatal, + * then start taking the whole group down immediately. + */ + if (sig_fatal(p, sig) && + !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && + !sigismember(&t->real_blocked, sig) && + (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) { + /* + * This signal will be fatal to the whole group. + */ + if (!sig_kernel_coredump(sig)) { + /* + * Start a group exit and wake everybody up. + * This way we don't have other threads + * running and doing things after a slower + * thread has the fatal signal pending. + */ + signal->flags = SIGNAL_GROUP_EXIT; + signal->group_exit_code = sig; + signal->group_stop_count = 0; + t = p; + do { + sigaddset(&t->pending.signal, SIGKILL); + signal_wake_up(t, 1); + } while_each_thread(p, t); + return; + } + } + + /* + * The signal is already in the shared-pending queue. + * Tell the chosen thread to wake up and dequeue it. + */ + signal_wake_up(t, sig == SIGKILL); + return; +} + +static inline int legacy_queue(struct sigpending *signals, int sig) +{ + return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } static int send_signal(int sig, struct siginfo *info, struct task_struct *t, - struct sigpending *signals) + int group) { - struct sigqueue * q = NULL; - int ret = 0; + struct sigpending *pending; + struct sigqueue *q; + + assert_spin_locked(&t->sighand->siglock); + if (!prepare_signal(sig, t)) + return 0; + pending = group ? &t->signal->shared_pending : &t->pending; /* - * Deliver the signal to listening signalfds. This must be called - * with the sighand lock held. + * Short-circuit ignored signals and support queuing + * exactly one non-rt signal, so that we can get more + * detailed information about the cause of the signal. */ - signalfd_notify(t, sig); - + if (legacy_queue(pending, sig)) + return 0; /* * fast-pathed signals for kernel-internal things like SIGSTOP * or SIGKILL. @@ -688,7 +804,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, (is_si_special(info) || info->si_code >= 0))); if (q) { - list_add_tail(&q->list, &signals->list); + list_add_tail(&q->list, &pending->list); switch ((unsigned long) info) { case (unsigned long) SEND_SIG_NOINFO: q->info.si_signo = sig; @@ -718,13 +834,12 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, } out_set: - sigaddset(&signals->signal, sig); - return ret; + signalfd_notify(t, sig); + sigaddset(&pending->signal, sig); + complete_signal(sig, t, group); + return 0; } -#define LEGACY_QUEUE(sigptr, sig) \ - (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) - int print_fatal_signals; static void print_fatal_signal(struct pt_regs *regs, int signr) @@ -757,29 +872,16 @@ static int __init setup_print_fatal_signals(char *str) __setup("print-fatal-signals=", setup_print_fatal_signals); +int +__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) +{ + return send_signal(sig, info, p, 1); +} + static int specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) { - int ret = 0; - - BUG_ON(!irqs_disabled()); - assert_spin_locked(&t->sighand->siglock); - - /* Short-circuit ignored signals. */ - if (sig_ignored(t, sig)) - goto out; - - /* Support queueing exactly one non-rt signal, so that we - can get more detailed information about the cause of - the signal. */ - if (LEGACY_QUEUE(&t->pending, sig)) - goto out; - - ret = send_signal(sig, info, t, &t->pending); - if (!ret && !sigismember(&t->blocked, sig)) - signal_wake_up(t, sig == SIGKILL); -out: - return ret; + return send_signal(sig, info, t, 0); } /* @@ -790,7 +892,8 @@ out: * since we do not want to have a signal handler that was blocked * be invoked when user space had explicitly blocked it. * - * We don't want to have recursive SIGSEGV's etc, for example. + * We don't want to have recursive SIGSEGV's etc, for example, + * that is why we also clear SIGNAL_UNKILLABLE. */ int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) @@ -810,6 +913,8 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) recalc_sigpending_and_wake(t); } } + if (action->sa.sa_handler == SIG_DFL) + t->signal->flags &= ~SIGNAL_UNKILLABLE; ret = specific_send_sig_info(sig, info, t); spin_unlock_irqrestore(&t->sighand->siglock, flags); @@ -823,134 +928,6 @@ force_sig_specific(int sig, struct task_struct *t) } /* - * Test if P wants to take SIG. After we've checked all threads with this, - * it's equivalent to finding no threads not blocking SIG. Any threads not - * blocking SIG were ruled out because they are not running and already - * have pending signals. Such threads will dequeue from the shared queue - * as soon as they're available, so putting the signal on the shared queue - * will be equivalent to sending it to one such thread. - */ -static inline int wants_signal(int sig, struct task_struct *p) -{ - if (sigismember(&p->blocked, sig)) - return 0; - if (p->flags & PF_EXITING) - return 0; - if (sig == SIGKILL) - return 1; - if (task_is_stopped_or_traced(p)) - return 0; - return task_curr(p) || !signal_pending(p); -} - -static void -__group_complete_signal(int sig, struct task_struct *p) -{ - struct task_struct *t; - - /* - * Now find a thread we can wake up to take the signal off the queue. - * - * If the main thread wants the signal, it gets first crack. - * Probably the least surprising to the average bear. - */ - if (wants_signal(sig, p)) - t = p; - else if (thread_group_empty(p)) - /* - * There is just one thread and it does not need to be woken. - * It will dequeue unblocked signals before it runs again. - */ - return; - else { - /* - * Otherwise try to find a suitable thread. - */ - t = p->signal->curr_target; - if (t == NULL) - /* restart balancing at this thread */ - t = p->signal->curr_target = p; - - while (!wants_signal(sig, t)) { - t = next_thread(t); - if (t == p->signal->curr_target) - /* - * No thread needs to be woken. - * Any eligible threads will see - * the signal in the queue soon. - */ - return; - } - p->signal->curr_target = t; - } - - /* - * Found a killable thread. If the signal will be fatal, - * then start taking the whole group down immediately. - */ - if (sig_fatal(p, sig) && !(p->signal->flags & SIGNAL_GROUP_EXIT) && - !sigismember(&t->real_blocked, sig) && - (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) { - /* - * This signal will be fatal to the whole group. - */ - if (!sig_kernel_coredump(sig)) { - /* - * Start a group exit and wake everybody up. - * This way we don't have other threads - * running and doing things after a slower - * thread has the fatal signal pending. - */ - p->signal->flags = SIGNAL_GROUP_EXIT; - p->signal->group_exit_code = sig; - p->signal->group_stop_count = 0; - t = p; - do { - sigaddset(&t->pending.signal, SIGKILL); - signal_wake_up(t, 1); - } while_each_thread(p, t); - return; - } - } - - /* - * The signal is already in the shared-pending queue. - * Tell the chosen thread to wake up and dequeue it. - */ - signal_wake_up(t, sig == SIGKILL); - return; -} - -int -__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) -{ - int ret = 0; - - assert_spin_locked(&p->sighand->siglock); - handle_stop_signal(sig, p); - - /* Short-circuit ignored signals. */ - if (sig_ignored(p, sig)) - return ret; - - if (LEGACY_QUEUE(&p->signal->shared_pending, sig)) - /* This is a non-RT signal and we already have one queued. */ - return ret; - - /* - * Put this signal on the shared-pending queue, or fail with EAGAIN. - * We always use the shared queue for process-wide signals, - * to avoid several races. - */ - ret = send_signal(sig, info, p, &p->signal->shared_pending); - if (unlikely(ret)) - return ret; - - __group_complete_signal(sig, p); - return 0; -} - -/* * Nuke all other threads in the group. */ void zap_other_threads(struct task_struct *p) @@ -978,13 +955,11 @@ int __fatal_signal_pending(struct task_struct *tsk) } EXPORT_SYMBOL(__fatal_signal_pending); -/* - * Must be called under rcu_read_lock() or with tasklist_lock read-held. - */ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) { struct sighand_struct *sighand; + rcu_read_lock(); for (;;) { sighand = rcu_dereference(tsk->sighand); if (unlikely(sighand == NULL)) @@ -995,6 +970,7 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long break; spin_unlock_irqrestore(&sighand->siglock, *flags); } + rcu_read_unlock(); return sighand; } @@ -1043,9 +1019,6 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) struct task_struct *p; rcu_read_lock(); - if (unlikely(sig_needs_tasklist(sig))) - read_lock(&tasklist_lock); - retry: p = pid_task(pid, PIDTYPE_PID); if (p) { @@ -1059,10 +1032,8 @@ retry: */ goto retry; } - - if (unlikely(sig_needs_tasklist(sig))) - read_unlock(&tasklist_lock); rcu_read_unlock(); + return error; } @@ -1159,8 +1130,7 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) */ /* - * These two are the most common entry points. They send a signal - * just to the specific thread. + * The caller must ensure the task can't exit. */ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p) @@ -1175,17 +1145,9 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p) if (!valid_signal(sig)) return -EINVAL; - /* - * We need the tasklist lock even for the specific - * thread case (when we don't need to follow the group - * lists) in order to avoid races with "p->sighand" - * going away or changing from under us. - */ - read_lock(&tasklist_lock); spin_lock_irqsave(&p->sighand->siglock, flags); ret = specific_send_sig_info(sig, info, p); spin_unlock_irqrestore(&p->sighand->siglock, flags); - read_unlock(&tasklist_lock); return ret; } @@ -1291,28 +1253,24 @@ void sigqueue_free(struct sigqueue *q) __sigqueue_free(q); } -int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) +int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) { + int sig = q->info.si_signo; + struct sigpending *pending; unsigned long flags; - int ret = 0; + int ret; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); - /* - * The rcu based delayed sighand destroy makes it possible to - * run this without tasklist lock held. The task struct itself - * cannot go away as create_timer did get_task_struct(). - * - * We return -1, when the task is marked exiting, so - * posix_timer_event can redirect it to the group leader - */ - rcu_read_lock(); + ret = -1; + if (!likely(lock_task_sighand(t, &flags))) + goto ret; - if (!likely(lock_task_sighand(p, &flags))) { - ret = -1; - goto out_err; - } + ret = 1; /* the signal is ignored */ + if (!prepare_signal(sig, t)) + goto out; + ret = 0; if (unlikely(!list_empty(&q->list))) { /* * If an SI_TIMER entry is already queue just increment @@ -1322,77 +1280,15 @@ int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) q->info.si_overrun++; goto out; } - /* Short-circuit ignored signals. */ - if (sig_ignored(p, sig)) { - ret = 1; - goto out; - } - /* - * Deliver the signal to listening signalfds. This must be called - * with the sighand lock held. - */ - signalfd_notify(p, sig); - - list_add_tail(&q->list, &p->pending.list); - sigaddset(&p->pending.signal, sig); - if (!sigismember(&p->blocked, sig)) - signal_wake_up(p, sig == SIGKILL); - -out: - unlock_task_sighand(p, &flags); -out_err: - rcu_read_unlock(); - - return ret; -} - -int -send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) -{ - unsigned long flags; - int ret = 0; - - BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); - - read_lock(&tasklist_lock); - /* Since it_lock is held, p->sighand cannot be NULL. */ - spin_lock_irqsave(&p->sighand->siglock, flags); - handle_stop_signal(sig, p); - - /* Short-circuit ignored signals. */ - if (sig_ignored(p, sig)) { - ret = 1; - goto out; - } - if (unlikely(!list_empty(&q->list))) { - /* - * If an SI_TIMER entry is already queue just increment - * the overrun count. Other uses should not try to - * send the signal multiple times. - */ - BUG_ON(q->info.si_code != SI_TIMER); - q->info.si_overrun++; - goto out; - } - /* - * Deliver the signal to listening signalfds. This must be called - * with the sighand lock held. - */ - signalfd_notify(p, sig); - - /* - * Put this signal on the shared-pending queue. - * We always use the shared queue for process-wide signals, - * to avoid several races. - */ - list_add_tail(&q->list, &p->signal->shared_pending.list); - sigaddset(&p->signal->shared_pending.signal, sig); - - __group_complete_signal(sig, p); + signalfd_notify(t, sig); + pending = group ? &t->signal->shared_pending : &t->pending; + list_add_tail(&q->list, &pending->list); + sigaddset(&pending->signal, sig); + complete_signal(sig, t, group); out: - spin_unlock_irqrestore(&p->sighand->siglock, flags); - read_unlock(&tasklist_lock); + unlock_task_sighand(t, &flags); +ret: return ret; } @@ -1723,8 +1619,9 @@ static int do_signal_stop(int signr) } else { struct task_struct *t; - if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || - unlikely(sig->group_exit_task)) + if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE)) + != SIGNAL_STOP_DEQUEUED) || + unlikely(signal_group_exit(sig))) return 0; /* * There is no group stop already in progress. @@ -1799,8 +1696,9 @@ static int ptrace_signal(int signr, siginfo_t *info, int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie) { - sigset_t *mask = ¤t->blocked; - int signr = 0; + struct sighand_struct *sighand = current->sighand; + struct signal_struct *signal = current->signal; + int signr; relock: /* @@ -1811,16 +1709,32 @@ relock: */ try_to_freeze(); - spin_lock_irq(¤t->sighand->siglock); + spin_lock_irq(&sighand->siglock); + /* + * Every stopped thread goes here after wakeup. Check to see if + * we should notify the parent, prepare_signal(SIGCONT) encodes + * the CLD_ si_code into SIGNAL_CLD_MASK bits. + */ + if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { + int why = (signal->flags & SIGNAL_STOP_CONTINUED) + ? CLD_CONTINUED : CLD_STOPPED; + signal->flags &= ~SIGNAL_CLD_MASK; + spin_unlock_irq(&sighand->siglock); + + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current->group_leader, why); + read_unlock(&tasklist_lock); + goto relock; + } + for (;;) { struct k_sigaction *ka; - if (unlikely(current->signal->group_stop_count > 0) && + if (unlikely(signal->group_stop_count > 0) && do_signal_stop(0)) goto relock; - signr = dequeue_signal(current, mask, info); - + signr = dequeue_signal(current, ¤t->blocked, info); if (!signr) break; /* will return 0 */ @@ -1830,7 +1744,7 @@ relock: continue; } - ka = ¤t->sighand->action[signr-1]; + ka = &sighand->action[signr-1]; if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { @@ -1852,7 +1766,8 @@ relock: /* * Global init gets no signals it doesn't want. */ - if (is_global_init(current)) + if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && + !signal_group_exit(signal)) continue; if (sig_kernel_stop(signr)) { @@ -1867,14 +1782,14 @@ relock: * We need to check for that and bail out if necessary. */ if (signr != SIGSTOP) { - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&sighand->siglock); /* signals can be posted during this window */ if (is_current_pgrp_orphaned()) goto relock; - spin_lock_irq(¤t->sighand->siglock); + spin_lock_irq(&sighand->siglock); } if (likely(do_signal_stop(signr))) { @@ -1889,15 +1804,16 @@ relock: continue; } - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&sighand->siglock); /* * Anything else is fatal, maybe with a core dump. */ current->flags |= PF_SIGNALED; - if ((signr != SIGKILL) && print_fatal_signals) - print_fatal_signal(regs, signr); + if (sig_kernel_coredump(signr)) { + if (print_fatal_signals) + print_fatal_signal(regs, signr); /* * If it was able to dump core, this kills all * other threads in the group and synchronizes with @@ -1915,7 +1831,7 @@ relock: do_group_exit(signr); /* NOTREACHED */ } - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&sighand->siglock); return signr; } @@ -2259,6 +2175,7 @@ static int do_tkill(int tgid, int pid, int sig) int error; struct siginfo info; struct task_struct *p; + unsigned long flags; error = -ESRCH; info.si_signo = sig; @@ -2267,22 +2184,24 @@ static int do_tkill(int tgid, int pid, int sig) info.si_pid = task_tgid_vnr(current); info.si_uid = current->uid; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_task_by_vpid(pid); if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { error = check_kill_permission(sig, &info, p); /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. + * + * If lock_task_sighand() fails we pretend the task dies + * after receiving the signal. The window is tiny, and the + * signal is private anyway. */ - if (!error && sig && p->sighand) { - spin_lock_irq(&p->sighand->siglock); - handle_stop_signal(sig, p); + if (!error && sig && lock_task_sighand(p, &flags)) { error = specific_send_sig_info(sig, &info, p); - spin_unlock_irq(&p->sighand->siglock); + unlock_task_sighand(p, &flags); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return error; } @@ -2339,13 +2258,14 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { + struct task_struct *t = current; struct k_sigaction *k; sigset_t mask; if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) return -EINVAL; - k = ¤t->sighand->action[sig-1]; + k = &t->sighand->action[sig-1]; spin_lock_irq(¤t->sighand->siglock); if (oact) @@ -2366,9 +2286,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) * (for example, SIGCHLD), shall cause the pending signal to * be discarded, whether or not it is blocked" */ - if (act->sa.sa_handler == SIG_IGN || - (act->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) { - struct task_struct *t = current; + if (__sig_ignored(t, sig)) { sigemptyset(&mask); sigaddset(&mask, sig); rm_from_queue_full(&mask, &t->signal->shared_pending); @@ -2623,7 +2541,7 @@ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 3c44956ee7e..36e06174004 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -589,16 +589,20 @@ static void takeover_tasklets(unsigned int cpu) local_irq_disable(); /* Find end, append list for that CPU. */ - *__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).head; - __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; - per_cpu(tasklet_vec, cpu).head = NULL; - per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; + if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { + *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head; + __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; + per_cpu(tasklet_vec, cpu).head = NULL; + per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; + } raise_softirq_irqoff(TASKLET_SOFTIRQ); - *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; - __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; - per_cpu(tasklet_hi_vec, cpu).head = NULL; - per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; + if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { + *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; + __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; + per_cpu(tasklet_hi_vec, cpu).head = NULL; + per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; + } raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); diff --git a/kernel/sys.c b/kernel/sys.c index e423d0d9e6f..895d2d4c949 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -978,8 +978,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) goto out; if (task_pgrp(p) != pgrp) { - detach_pid(p, PIDTYPE_PGID); - attach_pid(p, PIDTYPE_PGID, pgrp); + change_pid(p, PIDTYPE_PGID, pgrp); set_task_pgrp(p, pid_nr(pgrp)); } @@ -992,54 +991,67 @@ out: asmlinkage long sys_getpgid(pid_t pid) { + struct task_struct *p; + struct pid *grp; + int retval; + + rcu_read_lock(); if (!pid) - return task_pgrp_vnr(current); + grp = task_pgrp(current); else { - int retval; - struct task_struct *p; - - read_lock(&tasklist_lock); - p = find_task_by_vpid(pid); retval = -ESRCH; - if (p) { - retval = security_task_getpgid(p); - if (!retval) - retval = task_pgrp_vnr(p); - } - read_unlock(&tasklist_lock); - return retval; + p = find_task_by_vpid(pid); + if (!p) + goto out; + grp = task_pgrp(p); + if (!grp) + goto out; + + retval = security_task_getpgid(p); + if (retval) + goto out; } + retval = pid_vnr(grp); +out: + rcu_read_unlock(); + return retval; } #ifdef __ARCH_WANT_SYS_GETPGRP asmlinkage long sys_getpgrp(void) { - /* SMP - assuming writes are word atomic this is fine */ - return task_pgrp_vnr(current); + return sys_getpgid(0); } #endif asmlinkage long sys_getsid(pid_t pid) { + struct task_struct *p; + struct pid *sid; + int retval; + + rcu_read_lock(); if (!pid) - return task_session_vnr(current); + sid = task_session(current); else { - int retval; - struct task_struct *p; - - rcu_read_lock(); - p = find_task_by_vpid(pid); retval = -ESRCH; - if (p) { - retval = security_task_getsid(p); - if (!retval) - retval = task_session_vnr(p); - } - rcu_read_unlock(); - return retval; + p = find_task_by_vpid(pid); + if (!p) + goto out; + sid = task_session(p); + if (!sid) + goto out; + + retval = security_task_getsid(p); + if (retval) + goto out; } + retval = pid_vnr(sid); +out: + rcu_read_unlock(); + return retval; } asmlinkage long sys_setsid(void) @@ -1572,11 +1584,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) goto out; } - rcu_read_lock(); - if (!lock_task_sighand(p, &flags)) { - rcu_read_unlock(); + if (!lock_task_sighand(p, &flags)) return; - } switch (who) { case RUSAGE_BOTH: @@ -1612,9 +1621,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) default: BUG(); } - unlock_task_sighand(p, &flags); - rcu_read_unlock(); out: cputime_to_timeval(utime, &r->ru_utime); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 07e86a82807..4a23517169a 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -183,7 +183,7 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, if (!tsk) { rcu_read_lock(); - tsk = find_task_by_pid(pid); + tsk = find_task_by_vpid(pid); if (tsk) get_task_struct(tsk); rcu_read_unlock(); @@ -230,7 +230,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, */ rcu_read_lock(); if (!first) - first = find_task_by_pid(tgid); + first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; @@ -547,7 +547,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (!stats) goto err; - rc = fill_pid(tsk->pid, tsk, stats); + rc = fill_pid(-1, tsk, stats); if (rc < 0) goto err; diff --git a/kernel/time.c b/kernel/time.c index 86729042e4c..6a08660b4fa 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -36,6 +36,7 @@ #include <linux/security.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/math64.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -245,7 +246,7 @@ unsigned int inline jiffies_to_msecs(const unsigned long j) return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); #else # if BITS_PER_LONG == 32 - return ((u64)HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32; + return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32; # else return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN; # endif @@ -261,7 +262,7 @@ unsigned int inline jiffies_to_usecs(const unsigned long j) return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); #else # if BITS_PER_LONG == 32 - return ((u64)HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; + return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; # else return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN; # endif @@ -391,13 +392,17 @@ EXPORT_SYMBOL(set_normalized_timespec); struct timespec ns_to_timespec(const s64 nsec) { struct timespec ts; + s32 rem; if (!nsec) return (struct timespec) {0, 0}; - ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec); - if (unlikely(nsec < 0)) - set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec); + ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem); + if (unlikely(rem < 0)) { + ts.tv_sec--; + rem += NSEC_PER_SEC; + } + ts.tv_nsec = rem; return ts; } @@ -471,7 +476,7 @@ unsigned long msecs_to_jiffies(const unsigned int m) if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) return MAX_JIFFY_OFFSET; - return ((u64)MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32; #endif } @@ -486,7 +491,7 @@ unsigned long usecs_to_jiffies(const unsigned int u) #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) return u * (HZ / USEC_PER_SEC); #else - return ((u64)USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) + return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) >> USEC_TO_HZ_SHR32; #endif } @@ -527,8 +532,10 @@ jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) * Convert jiffies to nanoseconds and separate with * one divide. */ - u64 nsec = (u64)jiffies * TICK_NSEC; - value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec); + u32 rem; + value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC, + NSEC_PER_SEC, &rem); + value->tv_nsec = rem; } EXPORT_SYMBOL(jiffies_to_timespec); @@ -566,12 +573,11 @@ void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) * Convert jiffies to nanoseconds and separate with * one divide. */ - u64 nsec = (u64)jiffies * TICK_NSEC; - long tv_usec; + u32 rem; - value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec); - tv_usec /= NSEC_PER_USEC; - value->tv_usec = tv_usec; + value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC, + NSEC_PER_SEC, &rem); + value->tv_usec = rem / NSEC_PER_USEC; } EXPORT_SYMBOL(jiffies_to_timeval); @@ -587,9 +593,7 @@ clock_t jiffies_to_clock_t(long x) return x / (HZ / USER_HZ); # endif #else - u64 tmp = (u64)x * TICK_NSEC; - do_div(tmp, (NSEC_PER_SEC / USER_HZ)); - return (long)tmp; + return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ); #endif } EXPORT_SYMBOL(jiffies_to_clock_t); @@ -601,16 +605,12 @@ unsigned long clock_t_to_jiffies(unsigned long x) return ~0UL; return x * (HZ / USER_HZ); #else - u64 jif; - /* Don't worry about loss of precision here .. */ if (x >= ~0UL / HZ * USER_HZ) return ~0UL; /* .. but do try to contain it here */ - jif = x * (u64) HZ; - do_div(jif, USER_HZ); - return jif; + return div_u64((u64)x * HZ, USER_HZ); #endif } EXPORT_SYMBOL(clock_t_to_jiffies); @@ -619,10 +619,9 @@ u64 jiffies_64_to_clock_t(u64 x) { #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 # if HZ < USER_HZ - x *= USER_HZ; - do_div(x, HZ); + x = div_u64(x * USER_HZ, HZ); # elif HZ > USER_HZ - do_div(x, HZ / USER_HZ); + x = div_u64(x, HZ / USER_HZ); # else /* Nothing to do */ # endif @@ -632,8 +631,7 @@ u64 jiffies_64_to_clock_t(u64 x) * but even this doesn't overflow in hundreds of years * in 64 bits, so.. */ - x *= TICK_NSEC; - do_div(x, (NSEC_PER_SEC / USER_HZ)); + x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ)); #endif return x; } @@ -642,21 +640,17 @@ EXPORT_SYMBOL(jiffies_64_to_clock_t); u64 nsec_to_clock_t(u64 x) { #if (NSEC_PER_SEC % USER_HZ) == 0 - do_div(x, (NSEC_PER_SEC / USER_HZ)); + return div_u64(x, NSEC_PER_SEC / USER_HZ); #elif (USER_HZ % 512) == 0 - x *= USER_HZ/512; - do_div(x, (NSEC_PER_SEC / 512)); + return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512); #else /* * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, * overflow after 64.99 years. * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... */ - x *= 9; - do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) / - USER_HZ)); + return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ); #endif - return x; } #if (BITS_PER_LONG < 64) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 73961f35fdc..dadde5361f3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -471,10 +471,10 @@ sysfs_show_available_clocksources(struct sys_device *dev, char *buf) /* * Sysfs setup bits: */ -static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources, +static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, sysfs_override_clocksource); -static SYSDEV_ATTR(available_clocksource, 0600, +static SYSDEV_ATTR(available_clocksource, 0444, sysfs_show_available_clocksources, NULL); static struct sysdev_class clocksource_sysclass = { diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5fd9b946977..5125ddd8196 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -15,7 +15,8 @@ #include <linux/jiffies.h> #include <linux/hrtimer.h> #include <linux/capability.h> -#include <asm/div64.h> +#include <linux/math64.h> +#include <linux/clocksource.h> #include <asm/timex.h> /* @@ -23,11 +24,14 @@ */ unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ unsigned long tick_nsec; /* ACTHZ period (nsec) */ -static u64 tick_length, tick_length_base; +u64 tick_length; +static u64 tick_length_base; + +static struct hrtimer leap_timer; #define MAX_TICKADJ 500 /* microsecs */ #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ - TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) + NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables @@ -35,11 +39,12 @@ static u64 tick_length, tick_length_base; /* TIME_ERROR prevents overwriting the CMOS clock */ static int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ -static s64 time_offset; /* time adjustment (ns) */ +static long time_tai; /* TAI offset (s) */ +static s64 time_offset; /* time adjustment (ns) */ static long time_constant = 2; /* pll time constant */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -long time_freq; /* frequency offset (scaled ppm)*/ +static s64 time_freq; /* frequency offset (scaled ns/s)*/ static long time_reftime; /* time at last adjustment (s) */ long time_adjust; static long ntp_tick_adj; @@ -47,16 +52,56 @@ static long ntp_tick_adj; static void ntp_update_frequency(void) { u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) - << TICK_LENGTH_SHIFT; - second_length += (s64)ntp_tick_adj << TICK_LENGTH_SHIFT; - second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); + << NTP_SCALE_SHIFT; + second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT; + second_length += time_freq; tick_length_base = second_length; - do_div(second_length, HZ); - tick_nsec = second_length >> TICK_LENGTH_SHIFT; + tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; + tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ); +} + +static void ntp_update_offset(long offset) +{ + long mtemp; + s64 freq_adj; + + if (!(time_status & STA_PLL)) + return; - do_div(tick_length_base, NTP_INTERVAL_FREQ); + if (!(time_status & STA_NANO)) + offset *= NSEC_PER_USEC; + + /* + * Scale the phase adjustment and + * clamp to the operating range. + */ + offset = min(offset, MAXPHASE); + offset = max(offset, -MAXPHASE); + + /* + * Select how the frequency is to be controlled + * and in which mode (PLL or FLL). + */ + if (time_status & STA_FREQHOLD || time_reftime == 0) + time_reftime = xtime.tv_sec; + mtemp = xtime.tv_sec - time_reftime; + time_reftime = xtime.tv_sec; + + freq_adj = (s64)offset * mtemp; + freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant); + time_status &= ~STA_MODE; + if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { + freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL), + mtemp); + time_status |= STA_MODE; + } + freq_adj += time_freq; + freq_adj = min(freq_adj, MAXFREQ_SCALED); + time_freq = max(freq_adj, -MAXFREQ_SCALED); + + time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ); } /** @@ -78,62 +123,70 @@ void ntp_clear(void) } /* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. + * Leap second processing. If in leap-insert state at the end of the + * day, the system clock is set back one second; if in leap-delete + * state, the system clock is set ahead one second. */ -void second_overflow(void) +static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) { - long time_adj; + enum hrtimer_restart res = HRTIMER_NORESTART; - /* Bump the maxerror field */ - time_maxerror += MAXFREQ >> SHIFT_USEC; - if (time_maxerror > NTP_PHASE_LIMIT) { - time_maxerror = NTP_PHASE_LIMIT; - time_status |= STA_UNSYNC; - } + write_seqlock_irq(&xtime_lock); - /* - * Leap second processing. If in leap-insert state at the end of the - * day, the system clock is set back one second; if in leap-delete - * state, the system clock is set ahead one second. The microtime() - * routine or external clock driver will insure that reported time is - * always monotonic. The ugly divides should be replaced. - */ switch (time_state) { case TIME_OK: - if (time_status & STA_INS) - time_state = TIME_INS; - else if (time_status & STA_DEL) - time_state = TIME_DEL; break; case TIME_INS: - if (xtime.tv_sec % 86400 == 0) { - xtime.tv_sec--; - wall_to_monotonic.tv_sec++; - time_state = TIME_OOP; - printk(KERN_NOTICE "Clock: inserting leap second " - "23:59:60 UTC\n"); - } + xtime.tv_sec--; + wall_to_monotonic.tv_sec++; + time_state = TIME_OOP; + printk(KERN_NOTICE "Clock: " + "inserting leap second 23:59:60 UTC\n"); + leap_timer.expires = ktime_add_ns(leap_timer.expires, + NSEC_PER_SEC); + res = HRTIMER_RESTART; break; case TIME_DEL: - if ((xtime.tv_sec + 1) % 86400 == 0) { - xtime.tv_sec++; - wall_to_monotonic.tv_sec--; - time_state = TIME_WAIT; - printk(KERN_NOTICE "Clock: deleting leap second " - "23:59:59 UTC\n"); - } + xtime.tv_sec++; + time_tai--; + wall_to_monotonic.tv_sec--; + time_state = TIME_WAIT; + printk(KERN_NOTICE "Clock: " + "deleting leap second 23:59:59 UTC\n"); break; case TIME_OOP: + time_tai++; time_state = TIME_WAIT; - break; + /* fall through */ case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; + time_state = TIME_OK; + break; + } + update_vsyscall(&xtime, clock); + + write_sequnlock_irq(&xtime_lock); + + return res; +} + +/* + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + */ +void second_overflow(void) +{ + s64 time_adj; + + /* Bump the maxerror field */ + time_maxerror += MAXFREQ / NSEC_PER_USEC; + if (time_maxerror > NTP_PHASE_LIMIT) { + time_maxerror = NTP_PHASE_LIMIT; + time_status |= STA_UNSYNC; } /* @@ -143,7 +196,7 @@ void second_overflow(void) tick_length = tick_length_base; time_adj = shift_right(time_offset, SHIFT_PLL + time_constant); time_offset -= time_adj; - tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE); + tick_length += time_adj; if (unlikely(time_adjust)) { if (time_adjust > MAX_TICKADJ) { @@ -154,25 +207,12 @@ void second_overflow(void) tick_length -= MAX_TICKADJ_SCALED; } else { tick_length += (s64)(time_adjust * NSEC_PER_USEC / - NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; + NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT; time_adjust = 0; } } } -/* - * Return how long ticks are at the moment, that is, how much time - * update_wall_time_one_tick will add to xtime next time we call it - * (assuming no calls to do_adjtimex in the meantime). - * The return value is in fixed-point nanoseconds shifted by the - * specified number of bits to the right of the binary point. - * This function has no side-effects. - */ -u64 current_tick_length(void) -{ - return tick_length; -} - #ifdef CONFIG_GENERIC_CMOS_UPDATE /* Disable the cmos update - used by virtualization and embedded */ @@ -236,8 +276,8 @@ static inline void notify_cmos_timer(void) { } */ int do_adjtimex(struct timex *txc) { - long mtemp, save_adjust, rem; - s64 freq_adj, temp64; + struct timespec ts; + long save_adjust, sec; int result; /* In order to modify anything, you gotta be super-user! */ @@ -247,147 +287,132 @@ int do_adjtimex(struct timex *txc) /* Now we validate the data before disabling interrupts */ if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) { - /* singleshot must not be used with any other mode bits */ - if (txc->modes != ADJ_OFFSET_SINGLESHOT && - txc->modes != ADJ_OFFSET_SS_READ) + /* singleshot must not be used with any other mode bits */ + if (txc->modes & ~ADJ_OFFSET_SS_READ) return -EINVAL; } - if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) - /* adjustment Offset limited to +- .512 seconds */ - if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) - return -EINVAL; - /* if the quartz is off by more than 10% something is VERY wrong ! */ if (txc->modes & ADJ_TICK) if (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ) return -EINVAL; + if (time_state != TIME_OK && txc->modes & ADJ_STATUS) + hrtimer_cancel(&leap_timer); + getnstimeofday(&ts); + write_seqlock_irq(&xtime_lock); - result = time_state; /* mostly `TIME_OK' */ /* Save for later - semantics of adjtime is to return old value */ save_adjust = time_adjust; -#if 0 /* STA_CLOCKERR is never set yet */ - time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ -#endif /* If there are input parameters, then process them */ - if (txc->modes) - { - if (txc->modes & ADJ_STATUS) /* only set allowed bits */ - time_status = (txc->status & ~STA_RONLY) | - (time_status & STA_RONLY); - - if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ - if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { - result = -EINVAL; - goto leave; - } - time_freq = ((s64)txc->freq * NSEC_PER_USEC) - >> (SHIFT_USEC - SHIFT_NSEC); - } - - if (txc->modes & ADJ_MAXERROR) { - if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; + if (txc->modes) { + if (txc->modes & ADJ_STATUS) { + if ((time_status & STA_PLL) && + !(txc->status & STA_PLL)) { + time_state = TIME_OK; + time_status = STA_UNSYNC; + } + /* only set allowed bits */ + time_status &= STA_RONLY; + time_status |= txc->status & ~STA_RONLY; + + switch (time_state) { + case TIME_OK: + start_timer: + sec = ts.tv_sec; + if (time_status & STA_INS) { + time_state = TIME_INS; + sec += 86400 - sec % 86400; + hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); + } else if (time_status & STA_DEL) { + time_state = TIME_DEL; + sec += 86400 - (sec + 1) % 86400; + hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS); + } + break; + case TIME_INS: + case TIME_DEL: + time_state = TIME_OK; + goto start_timer; + break; + case TIME_WAIT: + if (!(time_status & (STA_INS | STA_DEL))) + time_state = TIME_OK; + break; + case TIME_OOP: + hrtimer_restart(&leap_timer); + break; + } } - time_maxerror = txc->maxerror; - } - if (txc->modes & ADJ_ESTERROR) { - if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; + if (txc->modes & ADJ_NANO) + time_status |= STA_NANO; + if (txc->modes & ADJ_MICRO) + time_status &= ~STA_NANO; + + if (txc->modes & ADJ_FREQUENCY) { + time_freq = (s64)txc->freq * PPM_SCALE; + time_freq = min(time_freq, MAXFREQ_SCALED); + time_freq = max(time_freq, -MAXFREQ_SCALED); } - time_esterror = txc->esterror; - } - if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ - if (txc->constant < 0) { /* NTP v4 uses values > 6 */ - result = -EINVAL; - goto leave; + if (txc->modes & ADJ_MAXERROR) + time_maxerror = txc->maxerror; + if (txc->modes & ADJ_ESTERROR) + time_esterror = txc->esterror; + + if (txc->modes & ADJ_TIMECONST) { + time_constant = txc->constant; + if (!(time_status & STA_NANO)) + time_constant += 4; + time_constant = min(time_constant, (long)MAXTC); + time_constant = max(time_constant, 0l); } - time_constant = min(txc->constant + 4, (long)MAXTC); - } - if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ - if (txc->modes == ADJ_OFFSET_SINGLESHOT) { - /* adjtime() is independent from ntp_adjtime() */ - time_adjust = txc->offset; + if (txc->modes & ADJ_TAI && txc->constant > 0) + time_tai = txc->constant; + + if (txc->modes & ADJ_OFFSET) { + if (txc->modes == ADJ_OFFSET_SINGLESHOT) + /* adjtime() is independent from ntp_adjtime() */ + time_adjust = txc->offset; + else + ntp_update_offset(txc->offset); } - else if (time_status & STA_PLL) { - time_offset = txc->offset * NSEC_PER_USEC; - - /* - * Scale the phase adjustment and - * clamp to the operating range. - */ - time_offset = min(time_offset, (s64)MAXPHASE * NSEC_PER_USEC); - time_offset = max(time_offset, (s64)-MAXPHASE * NSEC_PER_USEC); - - /* - * Select whether the frequency is to be controlled - * and in which mode (PLL or FLL). Clamp to the operating - * range. Ugly multiply/divide should be replaced someday. - */ - - if (time_status & STA_FREQHOLD || time_reftime == 0) - time_reftime = xtime.tv_sec; - mtemp = xtime.tv_sec - time_reftime; - time_reftime = xtime.tv_sec; - - freq_adj = time_offset * mtemp; - freq_adj = shift_right(freq_adj, time_constant * 2 + - (SHIFT_PLL + 2) * 2 - SHIFT_NSEC); - if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { - u64 utemp64; - temp64 = time_offset << (SHIFT_NSEC - SHIFT_FLL); - if (time_offset < 0) { - utemp64 = -temp64; - do_div(utemp64, mtemp); - freq_adj -= utemp64; - } else { - utemp64 = temp64; - do_div(utemp64, mtemp); - freq_adj += utemp64; - } - } - freq_adj += time_freq; - freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); - time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = div_long_long_rem_signed(time_offset, - NTP_INTERVAL_FREQ, - &rem); - time_offset <<= SHIFT_UPDATE; - } /* STA_PLL */ - } /* txc->modes & ADJ_OFFSET */ - if (txc->modes & ADJ_TICK) - tick_usec = txc->tick; - - if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) - ntp_update_frequency(); - } /* txc->modes */ -leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) + if (txc->modes & ADJ_TICK) + tick_usec = txc->tick; + + if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET)) + ntp_update_frequency(); + } + + result = time_state; /* mostly `TIME_OK' */ + if (time_status & (STA_UNSYNC|STA_CLOCKERR)) result = TIME_ERROR; if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || - (txc->modes == ADJ_OFFSET_SS_READ)) + (txc->modes == ADJ_OFFSET_SS_READ)) txc->offset = save_adjust; - else - txc->offset = ((long)shift_right(time_offset, SHIFT_UPDATE)) * - NTP_INTERVAL_FREQ / 1000; - txc->freq = (time_freq / NSEC_PER_USEC) << - (SHIFT_USEC - SHIFT_NSEC); + else { + txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, + NTP_SCALE_SHIFT); + if (!(time_status & STA_NANO)) + txc->offset /= NSEC_PER_USEC; + } + txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) * + (s64)PPM_SCALE_INV, + NTP_SCALE_SHIFT); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; txc->constant = time_constant; txc->precision = 1; - txc->tolerance = MAXFREQ; + txc->tolerance = MAXFREQ_SCALED / PPM_SCALE; txc->tick = tick_usec; + txc->tai = time_tai; /* PPS is not implemented, so these are zero */ txc->ppsfreq = 0; @@ -399,9 +424,15 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) txc->errcnt = 0; txc->stbcnt = 0; write_sequnlock_irq(&xtime_lock); - do_gettimeofday(&txc->time); + + txc->time.tv_sec = ts.tv_sec; + txc->time.tv_usec = ts.tv_nsec; + if (!(time_status & STA_NANO)) + txc->time.tv_usec /= NSEC_PER_USEC; + notify_cmos_timer(); - return(result); + + return result; } static int __init ntp_tick_adj_setup(char *str) @@ -411,3 +442,10 @@ static int __init ntp_tick_adj_setup(char *str) } __setup("ntp_tick_adj=", ntp_tick_adj_setup); + +void __init ntp_init(void) +{ + ntp_clear(); + hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + leap_timer.function = ntp_leap_second; +} diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2d6087c7cf9..e91c29f961c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -53,7 +53,7 @@ void update_xtime_cache(u64 nsec) timespec_add_ns(&xtime_cache, nsec); } -static struct clocksource *clock; /* pointer to current clocksource */ +struct clocksource *clock; #ifdef CONFIG_GENERIC_TIME @@ -246,7 +246,7 @@ void __init timekeeping_init(void) write_seqlock_irqsave(&xtime_lock, flags); - ntp_clear(); + ntp_init(); clock = clocksource_get_next(); clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); @@ -371,7 +371,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ - error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); + error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = abs(error2); for (look_ahead = 0; error2 > 0; look_ahead++) error2 >>= 2; @@ -380,8 +380,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * Now calculate the error in (1 << look_ahead) ticks, but first * remove the single look ahead already included in the error. */ - tick_error = current_tick_length() >> - (TICK_LENGTH_SHIFT - clock->shift + 1); + tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); tick_error -= clock->xtime_interval >> 1; error = ((error - tick_error) >> look_ahead) + tick_error; @@ -412,7 +411,7 @@ static void clocksource_adjust(s64 offset) s64 error, interval = clock->cycle_interval; int adj; - error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); + error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); if (error > interval) { error >>= 2; if (likely(error <= interval)) @@ -434,7 +433,7 @@ static void clocksource_adjust(s64 offset) clock->xtime_interval += interval; clock->xtime_nsec -= offset; clock->error -= (interval - offset) << - (TICK_LENGTH_SHIFT - clock->shift); + (NTP_SCALE_SHIFT - clock->shift); } /** @@ -473,8 +472,8 @@ void update_wall_time(void) } /* accumulate error between NTP and clock interval */ - clock->error += current_tick_length(); - clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); + clock->error += tick_length; + clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); } /* correct the clock when NTP error is too big */ diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl index 41468035473..eb51d76e058 100644 --- a/kernel/timeconst.pl +++ b/kernel/timeconst.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl # ----------------------------------------------------------------------- # -# Copyright 2007 rPath, Inc. - All Rights Reserved +# Copyright 2007-2008 rPath, Inc. - All Rights Reserved # # This file is part of the Linux kernel, and is made available under # the terms of the GNU General Public License version 2 or (at your @@ -20,198 +20,138 @@ %canned_values = ( 24 => [ '0xa6aaaaab','0x2aaaaaa',26, - '0xa6aaaaaaaaaaaaab','0x2aaaaaaaaaaaaaa',58, 125,3, '0xc49ba5e4','0x1fbe76c8b4',37, - '0xc49ba5e353f7ceda','0x1fbe76c8b439581062',69, 3,125, '0xa2c2aaab','0xaaaa',16, - '0xa2c2aaaaaaaaaaab','0xaaaaaaaaaaaa',48, 125000,3, '0xc9539b89','0x7fffbce4217d',47, - '0xc9539b8887229e91','0x7fffbce4217d2849cb25',79, 3,125000, ], 32 => [ '0xfa000000','0x6000000',27, - '0xfa00000000000000','0x600000000000000',59, 125,4, '0x83126e98','0xfdf3b645a',36, - '0x83126e978d4fdf3c','0xfdf3b645a1cac0831',68, 4,125, '0xf4240000','0x0',17, - '0xf424000000000000','0x0',49, 31250,1, '0x8637bd06','0x3fff79c842fa',46, - '0x8637bd05af6c69b6','0x3fff79c842fa5093964a',78, 1,31250, ], 48 => [ '0xa6aaaaab','0x6aaaaaa',27, - '0xa6aaaaaaaaaaaaab','0x6aaaaaaaaaaaaaa',59, 125,6, '0xc49ba5e4','0xfdf3b645a',36, - '0xc49ba5e353f7ceda','0xfdf3b645a1cac0831',68, 6,125, '0xa2c2aaab','0x15555',17, - '0xa2c2aaaaaaaaaaab','0x1555555555555',49, 62500,3, '0xc9539b89','0x3fffbce4217d',46, - '0xc9539b8887229e91','0x3fffbce4217d2849cb25',78, 3,62500, ], 64 => [ '0xfa000000','0xe000000',28, - '0xfa00000000000000','0xe00000000000000',60, 125,8, '0x83126e98','0x7ef9db22d',35, - '0x83126e978d4fdf3c','0x7ef9db22d0e560418',67, 8,125, '0xf4240000','0x0',18, - '0xf424000000000000','0x0',50, 15625,1, '0x8637bd06','0x1fff79c842fa',45, - '0x8637bd05af6c69b6','0x1fff79c842fa5093964a',77, 1,15625, ], 100 => [ '0xa0000000','0x0',28, - '0xa000000000000000','0x0',60, 10,1, '0xcccccccd','0x733333333',35, - '0xcccccccccccccccd','0x73333333333333333',67, 1,10, '0x9c400000','0x0',18, - '0x9c40000000000000','0x0',50, 10000,1, '0xd1b71759','0x1fff2e48e8a7',45, - '0xd1b71758e219652c','0x1fff2e48e8a71de69ad4',77, 1,10000, ], 122 => [ '0x8325c53f','0xfbcda3a',28, - '0x8325c53ef368eb05','0xfbcda3ac10c9714',60, 500,61, '0xf9db22d1','0x7fbe76c8b',35, - '0xf9db22d0e560418a','0x7fbe76c8b43958106',67, 61,500, '0x8012e2a0','0x3ef36',18, - '0x8012e29f79b47583','0x3ef368eb04325',50, 500000,61, '0xffda4053','0x1ffffbce4217',45, - '0xffda4052d666a983','0x1ffffbce4217d2849cb2',77, 61,500000, ], 128 => [ '0xfa000000','0x1e000000',29, - '0xfa00000000000000','0x1e00000000000000',61, 125,16, '0x83126e98','0x3f7ced916',34, - '0x83126e978d4fdf3c','0x3f7ced916872b020c',66, 16,125, '0xf4240000','0x40000',19, - '0xf424000000000000','0x4000000000000',51, 15625,2, '0x8637bd06','0xfffbce4217d',44, - '0x8637bd05af6c69b6','0xfffbce4217d2849cb25',76, 2,15625, ], 200 => [ '0xa0000000','0x0',29, - '0xa000000000000000','0x0',61, 5,1, '0xcccccccd','0x333333333',34, - '0xcccccccccccccccd','0x33333333333333333',66, 1,5, '0x9c400000','0x0',19, - '0x9c40000000000000','0x0',51, 5000,1, '0xd1b71759','0xfff2e48e8a7',44, - '0xd1b71758e219652c','0xfff2e48e8a71de69ad4',76, 1,5000, ], 250 => [ '0x80000000','0x0',29, - '0x8000000000000000','0x0',61, 4,1, '0x80000000','0x180000000',33, - '0x8000000000000000','0x18000000000000000',65, 1,4, '0xfa000000','0x0',20, - '0xfa00000000000000','0x0',52, 4000,1, '0x83126e98','0x7ff7ced9168',43, - '0x83126e978d4fdf3c','0x7ff7ced916872b020c4',75, 1,4000, ], 256 => [ '0xfa000000','0x3e000000',30, - '0xfa00000000000000','0x3e00000000000000',62, 125,32, '0x83126e98','0x1fbe76c8b',33, - '0x83126e978d4fdf3c','0x1fbe76c8b43958106',65, 32,125, '0xf4240000','0xc0000',20, - '0xf424000000000000','0xc000000000000',52, 15625,4, '0x8637bd06','0x7ffde7210be',43, - '0x8637bd05af6c69b6','0x7ffde7210be9424e592',75, 4,15625, ], 300 => [ '0xd5555556','0x2aaaaaaa',30, - '0xd555555555555556','0x2aaaaaaaaaaaaaaa',62, 10,3, '0x9999999a','0x1cccccccc',33, - '0x999999999999999a','0x1cccccccccccccccc',65, 3,10, '0xd0555556','0xaaaaa',20, - '0xd055555555555556','0xaaaaaaaaaaaaa',52, 10000,3, '0x9d495183','0x7ffcb923a29',43, - '0x9d495182a9930be1','0x7ffcb923a29c779a6b5',75, 3,10000, ], 512 => [ '0xfa000000','0x7e000000',31, - '0xfa00000000000000','0x7e00000000000000',63, 125,64, '0x83126e98','0xfdf3b645',32, - '0x83126e978d4fdf3c','0xfdf3b645a1cac083',64, 64,125, '0xf4240000','0x1c0000',21, - '0xf424000000000000','0x1c000000000000',53, 15625,8, '0x8637bd06','0x3ffef39085f',42, - '0x8637bd05af6c69b6','0x3ffef39085f4a1272c9',74, 8,15625, ], 1000 => [ '0x80000000','0x0',31, - '0x8000000000000000','0x0',63, 1,1, '0x80000000','0x0',31, - '0x8000000000000000','0x0',63, 1,1, '0xfa000000','0x0',22, - '0xfa00000000000000','0x0',54, 1000,1, '0x83126e98','0x1ff7ced9168',41, - '0x83126e978d4fdf3c','0x1ff7ced916872b020c4',73, 1,1000, ], 1024 => [ '0xfa000000','0xfe000000',32, - '0xfa00000000000000','0xfe00000000000000',64, 125,128, '0x83126e98','0x7ef9db22',31, - '0x83126e978d4fdf3c','0x7ef9db22d0e56041',63, 128,125, '0xf4240000','0x3c0000',22, - '0xf424000000000000','0x3c000000000000',54, 15625,16, '0x8637bd06','0x1fff79c842f',41, - '0x8637bd05af6c69b6','0x1fff79c842fa5093964',73, 16,15625, ], 1200 => [ '0xd5555556','0xd5555555',32, - '0xd555555555555556','0xd555555555555555',64, 5,6, '0x9999999a','0x66666666',31, - '0x999999999999999a','0x6666666666666666',63, 6,5, '0xd0555556','0x2aaaaa',22, - '0xd055555555555556','0x2aaaaaaaaaaaaa',54, 2500,3, '0x9d495183','0x1ffcb923a29',41, - '0x9d495182a9930be1','0x1ffcb923a29c779a6b5',73, 3,2500, ] ); @@ -264,6 +204,15 @@ sub fmuls($$$) { return 0; } +# Generate a hex value if the result fits in 64 bits; +# otherwise skip. +sub bignum_hex($) { + my($x) = @_; + my $s = $x->as_hex(); + + return (length($s) > 18) ? undef : $s; +} + # Provides mul, adj, and shr factors for a specific # (bit, time, hz) combination sub muladj($$$) { @@ -271,7 +220,7 @@ sub muladj($$$) { my $s = fmuls($b, $t, $hz); my $m = fmul($s, $t, $hz); my $a = fadj($s, $t, $hz); - return ($m->as_hex(), $a->as_hex(), $s); + return (bignum_hex($m), bignum_hex($a), $s); } # Provides numerator, denominator values @@ -288,12 +237,10 @@ sub conversions($$) { # HZ_TO_xx push(@val, muladj(32, $t, $hz)); - push(@val, muladj(64, $t, $hz)); push(@val, numden($t, $hz)); # xx_TO_HZ push(@val, muladj(32, $hz, $t)); - push(@val, muladj(64, $hz, $t)); push(@val, numden($hz, $t)); return @val; @@ -318,6 +265,19 @@ sub compute_values($) { return @val; } +sub outputval($$) +{ + my($name, $val) = @_; + my $csuf; + + if (defined($val)) { + if ($name !~ /SHR/) { + $val = "U64_C($val)"; + } + printf "#define %-23s %s\n", $name.$csuf, $val.$csuf; + } +} + sub output($@) { my($hz, @val) = @_; @@ -331,6 +291,7 @@ sub output($@) print "\n"; print "#include <linux/param.h>\n"; + print "#include <linux/types.h>\n"; print "\n"; print "#if HZ != $hz\n"; @@ -340,15 +301,13 @@ sub output($@) foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', 'HZ_TO_USEC','USEC_TO_HZ') { - foreach $bit (32, 64) { + foreach $bit (32) { foreach $suf ('MUL', 'ADJ', 'SHR') { - printf "#define %-23s %s\n", - "${pfx}_$suf$bit", shift(@val); + outputval("${pfx}_$suf$bit", shift(@val)); } } foreach $suf ('NUM', 'DEN') { - printf "#define %-23s %s\n", - "${pfx}_$suf", shift(@val); + outputval("${pfx}_$suf", shift(@val)); } } @@ -356,6 +315,23 @@ sub output($@) print "#endif /* KERNEL_TIMECONST_H */\n"; } +# Pretty-print Perl values +sub perlvals(@) { + my $v; + my @l = (); + + foreach $v (@_) { + if (!defined($v)) { + push(@l, 'undef'); + } elsif ($v =~ /^0x/) { + push(@l, "\'".$v."\'"); + } else { + push(@l, $v.''); + } + } + return join(',', @l); +} + ($hz) = @ARGV; # Use this to generate the %canned_values structure @@ -373,15 +349,15 @@ if ($hz eq '--can') { print "$pf$hz => [\n"; while (scalar(@values)) { my $bit; - foreach $bit (32, 64) { + foreach $bit (32) { my $m = shift(@values); my $a = shift(@values); my $s = shift(@values); - print "\t\t\'",$m,"\',\'",$a,"\',",$s,",\n"; + print "\t\t", perlvals($m,$a,$s), ",\n"; } my $n = shift(@values); my $d = shift(@values); - print "\t\t",$n,',',$d,",\n"; + print "\t\t", perlvals($n,$d), ",\n"; } print "\t]"; $pf = ', '; diff --git a/kernel/timer.c b/kernel/timer.c index f3d35d4ea42..ceacc662657 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -320,14 +320,130 @@ static void timer_stats_account_timer(struct timer_list *timer) static void timer_stats_account_timer(struct timer_list *timer) {} #endif -/** - * init_timer - initialize a timer. - * @timer: the timer to be initialized - * - * init_timer() must be done to a timer prior calling *any* of the - * other timer functions. +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS + +static struct debug_obj_descr timer_debug_descr; + +/* + * fixup_init is called when: + * - an active object is initialized */ -void init_timer(struct timer_list *timer) +static int timer_fixup_init(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + del_timer_sync(timer); + debug_object_init(timer, &timer_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int timer_fixup_activate(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + /* + * This is not really a fixup. The timer was + * statically initialized. We just make sure that it + * is tracked in the object tracker. + */ + if (timer->entry.next == NULL && + timer->entry.prev == TIMER_ENTRY_STATIC) { + debug_object_init(timer, &timer_debug_descr); + debug_object_activate(timer, &timer_debug_descr); + return 0; + } else { + WARN_ON_ONCE(1); + } + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int timer_fixup_free(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + del_timer_sync(timer); + debug_object_free(timer, &timer_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr timer_debug_descr = { + .name = "timer_list", + .fixup_init = timer_fixup_init, + .fixup_activate = timer_fixup_activate, + .fixup_free = timer_fixup_free, +}; + +static inline void debug_timer_init(struct timer_list *timer) +{ + debug_object_init(timer, &timer_debug_descr); +} + +static inline void debug_timer_activate(struct timer_list *timer) +{ + debug_object_activate(timer, &timer_debug_descr); +} + +static inline void debug_timer_deactivate(struct timer_list *timer) +{ + debug_object_deactivate(timer, &timer_debug_descr); +} + +static inline void debug_timer_free(struct timer_list *timer) +{ + debug_object_free(timer, &timer_debug_descr); +} + +static void __init_timer(struct timer_list *timer); + +void init_timer_on_stack(struct timer_list *timer) +{ + debug_object_init_on_stack(timer, &timer_debug_descr); + __init_timer(timer); +} +EXPORT_SYMBOL_GPL(init_timer_on_stack); + +void destroy_timer_on_stack(struct timer_list *timer) +{ + debug_object_free(timer, &timer_debug_descr); +} +EXPORT_SYMBOL_GPL(destroy_timer_on_stack); + +#else +static inline void debug_timer_init(struct timer_list *timer) { } +static inline void debug_timer_activate(struct timer_list *timer) { } +static inline void debug_timer_deactivate(struct timer_list *timer) { } +#endif + +static void __init_timer(struct timer_list *timer) { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); @@ -337,6 +453,19 @@ void init_timer(struct timer_list *timer) memset(timer->start_comm, 0, TASK_COMM_LEN); #endif } + +/** + * init_timer - initialize a timer. + * @timer: the timer to be initialized + * + * init_timer() must be done to a timer prior calling *any* of the + * other timer functions. + */ +void init_timer(struct timer_list *timer) +{ + debug_timer_init(timer); + __init_timer(timer); +} EXPORT_SYMBOL(init_timer); void init_timer_deferrable(struct timer_list *timer) @@ -351,6 +480,8 @@ static inline void detach_timer(struct timer_list *timer, { struct list_head *entry = &timer->entry; + debug_timer_deactivate(timer); + __list_del(entry->prev, entry->next); if (clear_pending) entry->next = NULL; @@ -405,6 +536,8 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) ret = 1; } + debug_timer_activate(timer); + new_base = __get_cpu_var(tvec_bases); if (base != new_base) { @@ -450,6 +583,7 @@ void add_timer_on(struct timer_list *timer, int cpu) BUG_ON(timer_pending(timer) || !timer->function); spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); + debug_timer_activate(timer); internal_add_timer(base, timer); /* * Check whether the other CPU is idle and needs to be @@ -1086,11 +1220,14 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; - setup_timer(&timer, process_timeout, (unsigned long)current); + setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); __mod_timer(&timer, expire); schedule(); del_singleshot_timer_sync(&timer); + /* Remove the timer from the object tracker */ + destroy_timer_on_stack(&timer); + timeout = expire - jiffies; out: diff --git a/kernel/user.c b/kernel/user.c index aefbbfa3159..865ecf57a09 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -384,7 +384,7 @@ void free_uid(struct user_struct *up) local_irq_restore(flags); } -struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) +struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) { struct hlist_head *hashent = uidhashentry(ns, uid); struct user_struct *up, *new; @@ -399,26 +399,12 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) spin_unlock_irq(&uidhash_lock); if (!up) { - new = kmem_cache_alloc(uid_cachep, GFP_KERNEL); + new = kmem_cache_zalloc(uid_cachep, GFP_KERNEL); if (!new) goto out_unlock; new->uid = uid; atomic_set(&new->__count, 1); - atomic_set(&new->processes, 0); - atomic_set(&new->files, 0); - atomic_set(&new->sigpending, 0); -#ifdef CONFIG_INOTIFY_USER - atomic_set(&new->inotify_watches, 0); - atomic_set(&new->inotify_devs, 0); -#endif -#ifdef CONFIG_POSIX_MQUEUE - new->mq_bytes = 0; -#endif - new->locked_shm = 0; -#ifdef CONFIG_KEYS - new->uid_keyring = new->session_keyring = NULL; -#endif if (sched_create_user(new) < 0) goto out_free_user; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7db251a959c..29fc39f1029 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -195,7 +195,6 @@ static void delayed_work_timer_fn(unsigned long __data) int queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { - timer_stats_timer_set_start_info(&dwork->timer); if (delay == 0) return queue_work(wq, &dwork->work); @@ -219,11 +218,12 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; - timer_stats_timer_set_start_info(&dwork->timer); if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); + timer_stats_timer_set_start_info(&dwork->timer); + /* This stores cwq for the moment, for the timer_fn */ set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id())); timer->expires = jiffies + delay; @@ -247,7 +247,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) if (cwq->run_depth > 3) { /* morton gets to eat his hat */ printk("%s: recursion depth exceeded: %d\n", - __FUNCTION__, cwq->run_depth); + __func__, cwq->run_depth); dump_stack(); } while (!list_empty(&cwq->worklist)) { @@ -564,7 +564,6 @@ EXPORT_SYMBOL(schedule_work); int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { - timer_stats_timer_set_start_info(&dwork->timer); return queue_delayed_work(keventd_wq, dwork, delay); } EXPORT_SYMBOL(schedule_delayed_work); @@ -581,7 +580,6 @@ EXPORT_SYMBOL(schedule_delayed_work); int schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { - timer_stats_timer_set_start_info(&dwork->timer); return queue_delayed_work_on(cpu, keventd_wq, dwork, delay); } EXPORT_SYMBOL(schedule_delayed_work_on); |