diff options
Diffstat (limited to 'kernel')
35 files changed, 924 insertions, 399 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 9c8c23227c7..87865f8b4ce 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1601,8 +1601,8 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, int audit_filter_user(struct netlink_skb_parms *cb, int type) { + enum audit_state state = AUDIT_DISABLED; struct audit_entry *e; - enum audit_state state; int ret = 1; rcu_read_lock(); diff --git a/kernel/capability.c b/kernel/capability.c index edb845a6e84..c8d3c776203 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -92,15 +92,17 @@ out: * cap_set_pg - set capabilities for all processes in a given process * group. We call this holding task_capability_lock and tasklist_lock. */ -static inline int cap_set_pg(int pgrp, kernel_cap_t *effective, +static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { struct task_struct *g, *target; int ret = -EPERM; int found = 0; + struct pid *pgrp; - do_each_task_pid(pgrp, PIDTYPE_PGID, g) { + pgrp = find_pid(pgrp_nr); + do_each_pid_task(pgrp, PIDTYPE_PGID, g) { target = g; while_each_thread(g, target) { if (!security_capset_check(target, effective, @@ -113,7 +115,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective, } found = 1; } - } while_each_task_pid(pgrp, PIDTYPE_PGID, g); + } while_each_pid_task(pgrp, PIDTYPE_PGID, g); if (!found) ret = 0; diff --git a/kernel/compat.c b/kernel/compat.c index 6952dd05730..cebb4c28c03 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1016,3 +1016,69 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, return sys_migrate_pages(pid, nr_bits + 1, old, new); } #endif + +struct compat_sysinfo { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + u16 procs; + u16 pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(u32)-sizeof(int)]; +}; + +asmlinkage long +compat_sys_sysinfo(struct compat_sysinfo __user *info) +{ + struct sysinfo s; + + do_sysinfo(&s); + + /* Check to see if any memory value is too large for 32-bit and scale + * down if needed + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + int bitcount = 0; + + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + + if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || + __put_user (s.uptime, &info->uptime) || + __put_user (s.loads[0], &info->loads[0]) || + __put_user (s.loads[1], &info->loads[1]) || + __put_user (s.loads[2], &info->loads[2]) || + __put_user (s.totalram, &info->totalram) || + __put_user (s.freeram, &info->freeram) || + __put_user (s.sharedram, &info->sharedram) || + __put_user (s.bufferram, &info->bufferram) || + __put_user (s.totalswap, &info->totalswap) || + __put_user (s.freeswap, &info->freeswap) || + __put_user (s.procs, &info->procs) || + __put_user (s.totalhigh, &info->totalhigh) || + __put_user (s.freehigh, &info->freehigh) || + __put_user (s.mem_unit, &info->mem_unit)) + return -EFAULT; + + return 0; +} + diff --git a/kernel/cpu.c b/kernel/cpu.c index 7406fe6966f..3d4206ada5c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -309,6 +309,8 @@ void enable_nonboot_cpus(void) mutex_lock(&cpu_add_remove_lock); cpu_hotplug_disabled = 0; mutex_unlock(&cpu_add_remove_lock); + if (cpus_empty(frozen_cpus)) + return; printk("Enabling non-boot CPUs ...\n"); for_each_cpu_mask(cpu, frozen_cpus) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6b05dc69c95..f382b0f775e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1540,7 +1540,7 @@ static const struct file_operations cpuset_file_operations = { .release = cpuset_file_release, }; -static struct inode_operations cpuset_dir_inode_operations = { +static const struct inode_operations cpuset_dir_inode_operations = { .lookup = simple_lookup, .mkdir = cpuset_mkdir, .rmdir = cpuset_rmdir, @@ -2656,7 +2656,7 @@ static int cpuset_open(struct inode *inode, struct file *file) return single_open(file, proc_cpuset_show, pid); } -struct file_operations proc_cpuset_operations = { +const struct file_operations proc_cpuset_operations = { .open = cpuset_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/exit.c b/kernel/exit.c index fec12eb1247..f132349c032 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -185,21 +185,19 @@ repeat: * This checks not only the pgrp, but falls back on the pid if no * satisfactory pgrp is found. I dunno - gdb doesn't work correctly * without this... + * + * The caller must hold rcu lock or the tasklist lock. */ -int session_of_pgrp(int pgrp) +struct pid *session_of_pgrp(struct pid *pgrp) { struct task_struct *p; - int sid = 0; - - read_lock(&tasklist_lock); + struct pid *sid = NULL; - p = find_task_by_pid_type(PIDTYPE_PGID, pgrp); + p = pid_task(pgrp, PIDTYPE_PGID); if (p == NULL) - p = find_task_by_pid(pgrp); + p = pid_task(pgrp, PIDTYPE_PID); if (p != NULL) - sid = process_session(p); - - read_unlock(&tasklist_lock); + sid = task_session(p); return sid; } @@ -212,53 +210,52 @@ int session_of_pgrp(int pgrp) * * "I ask you, have you ever known what it is to be an orphan?" */ -static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) +static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) { struct task_struct *p; int ret = 1; - do_each_task_pid(pgrp, PIDTYPE_PGID, p) { + do_each_pid_task(pgrp, PIDTYPE_PGID, p) { if (p == ignored_task || p->exit_state || is_init(p->real_parent)) continue; - if (process_group(p->real_parent) != pgrp && - process_session(p->real_parent) == process_session(p)) { + if (task_pgrp(p->real_parent) != pgrp && + task_session(p->real_parent) == task_session(p)) { ret = 0; break; } - } while_each_task_pid(pgrp, PIDTYPE_PGID, p); + } while_each_pid_task(pgrp, PIDTYPE_PGID, p); return ret; /* (sighing) "Often!" */ } -int is_orphaned_pgrp(int pgrp) +int is_current_pgrp_orphaned(void) { int retval; read_lock(&tasklist_lock); - retval = will_become_orphaned_pgrp(pgrp, NULL); + retval = will_become_orphaned_pgrp(task_pgrp(current), NULL); read_unlock(&tasklist_lock); return retval; } -static int has_stopped_jobs(int pgrp) +static int has_stopped_jobs(struct pid *pgrp) { int retval = 0; struct task_struct *p; - do_each_task_pid(pgrp, PIDTYPE_PGID, p) { + do_each_pid_task(pgrp, PIDTYPE_PGID, p) { if (p->state != TASK_STOPPED) continue; retval = 1; break; - } while_each_task_pid(pgrp, PIDTYPE_PGID, p); + } while_each_pid_task(pgrp, PIDTYPE_PGID, p); return retval; } /** - * reparent_to_init - Reparent the calling kernel thread to the init task - * of the pid space that the thread belongs to. + * reparent_to_init - Reparent the calling kernel thread to the init task of the pid space that the thread belongs to. * * If a kernel thread is launched as a result of a system call, or if * it ever exits, it should generally reparent itself to init so that @@ -431,8 +428,10 @@ static void close_files(struct files_struct * files) while (set) { if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); - if (file) + if (file) { filp_close(file, files); + cond_resched(); + } } i++; set >>= 1; @@ -649,14 +648,14 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) * than we are, and it was the only connection * outside, so the child pgrp is now orphaned. */ - if ((process_group(p) != process_group(father)) && - (process_session(p) == process_session(father))) { - int pgrp = process_group(p); + if ((task_pgrp(p) != task_pgrp(father)) && + (task_session(p) == task_session(father))) { + struct pid *pgrp = task_pgrp(p); if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) { - __kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp); - __kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp); + __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); + __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); } } } @@ -736,6 +735,7 @@ static void exit_notify(struct task_struct *tsk) int state; struct task_struct *t; struct list_head ptrace_dead, *_p, *_n; + struct pid *pgrp; if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT) && !thread_group_empty(tsk)) { @@ -788,12 +788,13 @@ static void exit_notify(struct task_struct *tsk) t = tsk->real_parent; - if ((process_group(t) != process_group(tsk)) && - (process_session(t) == process_session(tsk)) && - will_become_orphaned_pgrp(process_group(tsk), tsk) && - has_stopped_jobs(process_group(tsk))) { - __kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk)); - __kill_pg_info(SIGCONT, SEND_SIG_PRIV, process_group(tsk)); + pgrp = task_pgrp(tsk); + if ((task_pgrp(t) != pgrp) && + (task_session(t) != task_session(tsk)) && + will_become_orphaned_pgrp(pgrp, tsk) && + has_stopped_jobs(pgrp)) { + __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); + __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); } /* Let father know we died diff --git a/kernel/fork.c b/kernel/fork.c index d57118da73f..0b6293d94d9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -869,7 +869,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts sig->it_prof_incr = cputime_zero; sig->leader = 0; /* session leadership doesn't inherit */ - sig->tty_old_pgrp = 0; + sig->tty_old_pgrp = NULL; sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; @@ -1038,10 +1038,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = cputime_zero; p->stime = cputime_zero; p->sched_time = 0; +#ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ +#endif task_io_accounting_init(p); acct_clear_integrals(p); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d0ba190dfeb..f44e499e8fc 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -102,7 +102,7 @@ static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) = * * The function calculates the monotonic clock from the realtime * clock and the wall_to_monotonic offset and stores the result - * in normalized timespec format in the variable pointed to by ts. + * in normalized timespec format in the variable pointed to by @ts. */ void ktime_get_ts(struct timespec *ts) { @@ -583,8 +583,8 @@ EXPORT_SYMBOL_GPL(hrtimer_init); * @which_clock: which clock to query * @tp: pointer to timespec variable to store the resolution * - * Store the resolution of the clock selected by which_clock in the - * variable pointed to by tp. + * Store the resolution of the clock selected by @which_clock in the + * variable pointed to by @tp. */ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) { diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 1dab0ac3f79..681c52dbfe2 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,5 +1,5 @@ -obj-y := handle.o manage.o spurious.o resend.o chip.o +obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c new file mode 100644 index 00000000000..85a430da0fb --- /dev/null +++ b/kernel/irq/devres.c @@ -0,0 +1,88 @@ +#include <linux/module.h> +#include <linux/interrupt.h> + +/* + * Device resource management aware IRQ request/free implementation. + */ +struct irq_devres { + unsigned int irq; + void *dev_id; +}; + +static void devm_irq_release(struct device *dev, void *res) +{ + struct irq_devres *this = res; + + free_irq(this->irq, this->dev_id); +} + +static int devm_irq_match(struct device *dev, void *res, void *data) +{ + struct irq_devres *this = res, *match = data; + + return this->irq == match->irq && this->dev_id == match->dev_id; +} + +/** + * devm_request_irq - allocate an interrupt line for a managed device + * @dev: device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * Except for the extra @dev argument, this function takes the + * same arguments and performs the same function as + * request_irq(). IRQs requested with this function will be + * automatically freed on driver detach. + * + * If an IRQ allocated with this function needs to be freed + * separately, dev_free_irq() must be used. + */ +int devm_request_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) +{ + struct irq_devres *dr; + int rc; + + dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres), + GFP_KERNEL); + if (!dr) + return -ENOMEM; + + rc = request_irq(irq, handler, irqflags, devname, dev_id); + if (rc) { + kfree(dr); + return rc; + } + + dr->irq = irq; + dr->dev_id = dev_id; + devres_add(dev, dr); + + return 0; +} +EXPORT_SYMBOL(devm_request_irq); + +/** + * devm_free_irq - free an interrupt + * @dev: device to free interrupt for + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Except for the extra @dev argument, this function takes the + * same arguments and performs the same function as free_irq(). + * This function instead of free_irq() should be used to manually + * free IRQs allocated with dev_request_irq(). + */ +void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) +{ + struct irq_devres match_data = { irq, dev_id }; + + free_irq(irq, dev_id); + WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, + &match_data)); +} +EXPORT_SYMBOL(devm_free_irq); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 8b961adc3bd..7c85d69188e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -328,12 +328,14 @@ int setup_irq(unsigned int irq, struct irqaction *new) return 0; mismatch: +#ifdef CONFIG_DEBUG_SHIRQ if (!(new->flags & IRQF_PROBE_SHARED)) { printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq); if (old_name) printk(KERN_ERR "current handler: %s\n", old_name); dump_stack(); } +#endif spin_unlock_irqrestore(&desc->lock, flags); return -EBUSY; } @@ -357,6 +359,7 @@ void free_irq(unsigned int irq, void *dev_id) struct irq_desc *desc; struct irqaction **p; unsigned long flags; + irqreturn_t (*handler)(int, void *) = NULL; WARN_ON(in_interrupt()); if (irq >= NR_IRQS) @@ -396,6 +399,8 @@ void free_irq(unsigned int irq, void *dev_id) /* Make sure it's not being used on another CPU */ synchronize_irq(irq); + if (action->flags & IRQF_SHARED) + handler = action->handler; kfree(action); return; } @@ -403,6 +408,17 @@ void free_irq(unsigned int irq, void *dev_id) spin_unlock_irqrestore(&desc->lock, flags); return; } +#ifdef CONFIG_DEBUG_SHIRQ + if (handler) { + /* + * It's a shared IRQ -- the driver ought to be prepared for it + * to happen even now it's being freed, so let's make sure.... + * We do this after actually deregistering it, to make sure that + * a 'real' IRQ doesn't run in parallel with our fake + */ + handler(irq, dev_id); + } +#endif } EXPORT_SYMBOL(free_irq); @@ -475,6 +491,25 @@ int request_irq(unsigned int irq, irq_handler_t handler, select_smp_affinity(irq); +#ifdef CONFIG_DEBUG_SHIRQ + if (irqflags & IRQF_SHARED) { + /* + * It's a shared IRQ -- the driver ought to be prepared for it + * to happen immediately, so let's make sure.... + * We do this before actually registering it, to make sure that + * a 'real' IRQ doesn't run in parallel with our fake + */ + if (irqflags & IRQF_DISABLED) { + unsigned long flags; + + local_irq_save(flags); + handler(irq, dev_id); + local_irq_restore(flags); + } else + handler(irq, dev_id); + } +#endif + retval = setup_irq(irq, action); if (retval) kfree(action); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 61f5c717a8f..6d3be06e8ce 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -136,7 +136,6 @@ void register_irq_proc(unsigned int irq) entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir); if (entry) { - entry->nlink = 1; entry->data = (void *)(long)irq; entry->read_proc = irq_affinity_read_proc; entry->write_proc = irq_affinity_write_proc; diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 5d1d907378a..cee419143fd 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -32,8 +32,8 @@ * @gfp_mask: get_free_pages mask, passed to kmalloc() * @lock: the lock to be used to protect the fifo buffer * - * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the - * struct kfifo with kfree(). + * Do NOT pass the kfifo to kfifo_free() after use! Simply free the + * &struct kfifo with kfree(). */ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, gfp_t gfp_mask, spinlock_t *lock) @@ -108,7 +108,7 @@ EXPORT_SYMBOL(kfifo_free); * @buffer: the data to be added. * @len: the length of the data to be added. * - * This function copies at most 'len' bytes from the 'buffer' into + * This function copies at most @len bytes from the @buffer into * the FIFO depending on the free space, and returns the number of * bytes copied. * @@ -155,8 +155,8 @@ EXPORT_SYMBOL(__kfifo_put); * @buffer: where the data must be copied. * @len: the size of the destination buffer. * - * This function copies at most 'len' bytes from the FIFO into the - * 'buffer' and returns the number of copied bytes. + * This function copies at most @len bytes from the FIFO into the + * @buffer and returns the number of copied bytes. * * Note that with only one concurrent reader and one concurrent * writer, you don't need extra locking to use these functions. diff --git a/kernel/kthread.c b/kernel/kthread.c index 1db8c72d0d3..87c50ccd1d4 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -50,7 +50,7 @@ static struct kthread_stop_info kthread_stop_info; /** * kthread_should_stop - should this kthread return now? * - * When someone calls kthread_stop on your kthread, it will be woken + * When someone calls kthread_stop() on your kthread, it will be woken * and this will return true. You should then return, and your return * value will be passed through to kthread_stop(). */ @@ -143,7 +143,7 @@ static void keventd_create_kthread(struct work_struct *work) * it. See also kthread_run(), kthread_create_on_cpu(). * * When woken, the thread will run @threadfn() with @data as its - * argument. @threadfn can either call do_exit() directly if it is a + * argument. @threadfn() can either call do_exit() directly if it is a * standalone thread for which noone will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero @@ -192,7 +192,7 @@ EXPORT_SYMBOL(kthread_create); * * Description: This function is equivalent to set_cpus_allowed(), * except that @cpu doesn't need to be online, and the thread must be - * stopped (i.e., just returned from kthread_create(). + * stopped (i.e., just returned from kthread_create()). */ void kthread_bind(struct task_struct *k, unsigned int cpu) { diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 509efd49540..592c576d77a 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -70,6 +70,9 @@ static int graph_lock(void) static inline int graph_unlock(void) { + if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) + return DEBUG_LOCKS_WARN_ON(1); + __raw_spin_unlock(&lockdep_lock); return 0; } @@ -487,7 +490,7 @@ static void print_lock_dependencies(struct lock_class *class, int depth) * Add a new dependency to the head of the list: */ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, - struct list_head *head, unsigned long ip) + struct list_head *head, unsigned long ip, int distance) { struct lock_list *entry; /* @@ -499,6 +502,7 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, return 0; entry->class = this; + entry->distance = distance; if (!save_trace(&entry->trace)) return 0; @@ -712,6 +716,9 @@ find_usage_backwards(struct lock_class *source, unsigned int depth) struct lock_list *entry; int ret; + if (!__raw_spin_is_locked(&lockdep_lock)) + return DEBUG_LOCKS_WARN_ON(1); + if (depth > max_recursion_depth) max_recursion_depth = depth; if (depth >= RECURSION_LIMIT) @@ -900,7 +907,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next) + struct held_lock *next, int distance) { struct lock_list *entry; int ret; @@ -978,8 +985,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * L2 added to its dependency list, due to the first chain.) */ list_for_each_entry(entry, &prev->class->locks_after, entry) { - if (entry->class == next->class) + if (entry->class == next->class) { + if (distance == 1) + entry->distance = 1; return 2; + } } /* @@ -987,12 +997,13 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * to the previous lock's dependency list: */ ret = add_lock_to_list(prev->class, next->class, - &prev->class->locks_after, next->acquire_ip); + &prev->class->locks_after, next->acquire_ip, distance); + if (!ret) return 0; ret = add_lock_to_list(next->class, prev->class, - &next->class->locks_before, next->acquire_ip); + &next->class->locks_before, next->acquire_ip, distance); if (!ret) return 0; @@ -1040,13 +1051,14 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) goto out_bug; for (;;) { + int distance = curr->lockdep_depth - depth + 1; hlock = curr->held_locks + depth-1; /* * Only non-recursive-read entries get new dependencies * added: */ if (hlock->read != 2) { - if (!check_prev_add(curr, hlock, next)) + if (!check_prev_add(curr, hlock, next, distance)) return 0; /* * Stop after the first non-trylock entry, @@ -1293,7 +1305,8 @@ out_unlock_set: if (!subclass || force) lock->class_cache = class; - DEBUG_LOCKS_WARN_ON(class->subclass != subclass); + if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) + return NULL; return class; } @@ -1308,7 +1321,8 @@ static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class) struct list_head *hash_head = chainhashentry(chain_key); struct lock_chain *chain; - DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; /* * We can walk it lock-free, because entries only get added * to the hash: @@ -1394,7 +1408,9 @@ static void check_chain_key(struct task_struct *curr) return; } id = hlock->class - lock_classes; - DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS); + if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) + return; + if (prev_hlock && (prev_hlock->irq_context != hlock->irq_context)) chain_key = 0; @@ -2205,7 +2221,11 @@ out_calc_hash: if (!check_prevs_add(curr, hlock)) return 0; graph_unlock(); - } + } else + /* after lookup_chain_cache(): */ + if (unlikely(!debug_locks)) + return 0; + curr->lockdep_depth++; check_chain_key(curr); if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { @@ -2214,6 +2234,7 @@ out_calc_hash: printk("turning off the locking correctness validator.\n"); return 0; } + if (unlikely(curr->lockdep_depth > max_lockdep_depth)) max_lockdep_depth = curr->lockdep_depth; @@ -2764,4 +2785,3 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); - diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index b554b40a4aa..88fc611b3ae 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -77,12 +77,29 @@ static unsigned long count_backward_deps(struct lock_class *class) return ret; } +static void print_name(struct seq_file *m, struct lock_class *class) +{ + char str[128]; + const char *name = class->name; + + if (!name) { + name = __get_key_name(class->key, str); + seq_printf(m, "%s", name); + } else{ + seq_printf(m, "%s", name); + if (class->name_version > 1) + seq_printf(m, "#%d", class->name_version); + if (class->subclass) + seq_printf(m, "/%d", class->subclass); + } +} + static int l_show(struct seq_file *m, void *v) { unsigned long nr_forward_deps, nr_backward_deps; struct lock_class *class = m->private; - char str[128], c1, c2, c3, c4; - const char *name; + struct lock_list *entry; + char c1, c2, c3, c4; seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP @@ -97,16 +114,16 @@ static int l_show(struct seq_file *m, void *v) get_usage_chars(class, &c1, &c2, &c3, &c4); seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); - name = class->name; - if (!name) { - name = __get_key_name(class->key, str); - seq_printf(m, ": %s", name); - } else{ - seq_printf(m, ": %s", name); - if (class->name_version > 1) - seq_printf(m, "#%d", class->name_version); - if (class->subclass) - seq_printf(m, "/%d", class->subclass); + seq_printf(m, ": "); + print_name(m, class); + seq_puts(m, "\n"); + + list_for_each_entry(entry, &class->locks_after, entry) { + if (entry->distance == 1) { + seq_printf(m, " -> [%p] ", entry->class); + print_name(m, entry->class); + seq_puts(m, "\n"); + } } seq_puts(m, "\n"); @@ -227,7 +244,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) sum_forward_deps += count_forward_deps(class); } -#ifdef CONFIG_LOCKDEP_DEBUG +#ifdef CONFIG_DEBUG_LOCKDEP DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); #endif seq_printf(m, " lock-classes: %11lu [max: %lu]\n", diff --git a/kernel/panic.c b/kernel/panic.c index 525e365f723..623d1828259 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -150,6 +150,7 @@ EXPORT_SYMBOL(panic); * 'R' - User forced a module unload. * 'M' - Machine had a machine check experience. * 'B' - System has hit bad_page. + * 'U' - Userspace-defined naughtiness. * * The string is overwritten by the next call to print_taint(). */ @@ -158,13 +159,14 @@ const char *print_tainted(void) { static char buf[20]; if (tainted) { - snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c", + snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c", tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', tainted & TAINT_FORCED_MODULE ? 'F' : ' ', tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', - tainted & TAINT_BAD_PAGE ? 'B' : ' '); + tainted & TAINT_BAD_PAGE ? 'B' : ' ', + tainted & TAINT_USER ? 'U' : ' '); } else snprintf(buf, sizeof(buf), "Not tainted"); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5fe87de10ff..a1bf6161783 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -399,10 +399,9 @@ EXPORT_SYMBOL_GPL(register_posix_clock); static struct k_itimer * alloc_posix_timer(void) { struct k_itimer *tmr; - tmr = kmem_cache_alloc(posix_timers_cache, GFP_KERNEL); + tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); if (!tmr) return tmr; - memset(tmr, 0, sizeof (struct k_itimer)); if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { kmem_cache_free(posix_timers_cache, tmr); tmr = NULL; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index ed296225dcd..95f6657fff7 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -131,3 +131,29 @@ config SUSPEND_SMP bool depends on HOTPLUG_CPU && X86 && PM default y + +config APM_EMULATION + tristate "Advanced Power Management Emulation" + depends on PM && SYS_SUPPORTS_APM_EMULATION + help + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + In order to use APM, you will need supporting software. For location + and more information, read <file:Documentation/pm.txt> and the + Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 88fc5d7ac73..406b20adb27 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -87,52 +87,24 @@ static inline void platform_finish(void) } } +static void unprepare_processes(void) +{ + thaw_processes(); + pm_restore_console(); +} + static int prepare_processes(void) { int error = 0; pm_prepare_console(); - - error = disable_nonboot_cpus(); - if (error) - goto enable_cpus; - if (freeze_processes()) { error = -EBUSY; - goto thaw; + unprepare_processes(); } - - if (pm_disk_mode == PM_DISK_TESTPROC) { - printk("swsusp debug: Waiting for 5 seconds.\n"); - mdelay(5000); - goto thaw; - } - - error = platform_prepare(); - if (error) - goto thaw; - - /* Free memory before shutting down devices. */ - if (!(error = swsusp_shrink_memory())) - return 0; - - platform_finish(); - thaw: - thaw_processes(); - enable_cpus: - enable_nonboot_cpus(); - pm_restore_console(); return error; } -static void unprepare_processes(void) -{ - platform_finish(); - thaw_processes(); - enable_nonboot_cpus(); - pm_restore_console(); -} - /** * pm_suspend_disk - The granpappy of hibernation power management. * @@ -150,29 +122,45 @@ int pm_suspend_disk(void) if (error) return error; - if (pm_disk_mode == PM_DISK_TESTPROC) - return 0; + if (pm_disk_mode == PM_DISK_TESTPROC) { + printk("swsusp debug: Waiting for 5 seconds.\n"); + mdelay(5000); + goto Thaw; + } + /* Free memory before shutting down devices. */ + error = swsusp_shrink_memory(); + if (error) + goto Thaw; + + error = platform_prepare(); + if (error) + goto Thaw; suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) { - resume_console(); - printk("Some devices failed to suspend\n"); - goto Thaw; + printk(KERN_ERR "PM: Some devices failed to suspend\n"); + goto Resume_devices; } + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; if (pm_disk_mode == PM_DISK_TEST) { printk("swsusp debug: Waiting for 5 seconds.\n"); mdelay(5000); - goto Done; + goto Enable_cpus; } pr_debug("PM: snapshotting memory.\n"); in_suspend = 1; - if ((error = swsusp_suspend())) - goto Done; + error = swsusp_suspend(); + if (error) + goto Enable_cpus; if (in_suspend) { + enable_nonboot_cpus(); + platform_finish(); device_resume(); resume_console(); pr_debug("PM: writing image.\n"); @@ -188,7 +176,10 @@ int pm_suspend_disk(void) } swsusp_free(); - Done: + Enable_cpus: + enable_nonboot_cpus(); + Resume_devices: + platform_finish(); device_resume(); resume_console(); Thaw: @@ -237,19 +228,28 @@ static int software_resume(void) pr_debug("PM: Checking swsusp image.\n"); - if ((error = swsusp_check())) + error = swsusp_check(); + if (error) goto Done; pr_debug("PM: Preparing processes for restore.\n"); - if ((error = prepare_processes())) { + error = prepare_processes(); + if (error) { swsusp_close(); goto Done; } + error = platform_prepare(); + if (error) { + swsusp_free(); + goto Thaw; + } + pr_debug("PM: Reading swsusp image.\n"); - if ((error = swsusp_read())) { + error = swsusp_read(); + if (error) { swsusp_free(); goto Thaw; } @@ -257,21 +257,22 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); suspend_console(); - if ((error = device_suspend(PMSG_PRETHAW))) { - resume_console(); - printk("Some devices failed to suspend\n"); - swsusp_free(); - goto Thaw; - } + error = device_suspend(PMSG_PRETHAW); + if (error) + goto Free; - mb(); + error = disable_nonboot_cpus(); + if (!error) + swsusp_resume(); - pr_debug("PM: Restoring saved image.\n"); - swsusp_resume(); - pr_debug("PM: Restore failed, recovering.n"); + enable_nonboot_cpus(); + Free: + swsusp_free(); + platform_finish(); device_resume(); resume_console(); Thaw: + printk(KERN_ERR "PM: Restore failed, recovering.\n"); unprepare_processes(); Done: /* For success case, the suspend path will release the lock */ diff --git a/kernel/power/main.c b/kernel/power/main.c index ff3a6182f5f..e1c41312046 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -20,6 +20,7 @@ #include <linux/cpu.h> #include <linux/resume-trace.h> #include <linux/freezer.h> +#include <linux/vmstat.h> #include "power.h" @@ -43,6 +44,11 @@ void pm_set_ops(struct pm_ops * ops) mutex_unlock(&pm_mutex); } +static inline void pm_finish(suspend_state_t state) +{ + if (pm_ops->finish) + pm_ops->finish(state); +} /** * suspend_prepare - Do prep work before entering low-power state. @@ -63,16 +69,13 @@ static int suspend_prepare(suspend_state_t state) pm_prepare_console(); - error = disable_nonboot_cpus(); - if (error) - goto Enable_cpu; - if (freeze_processes()) { error = -EAGAIN; goto Thaw; } - if ((free_pages = nr_free_pages()) < FREE_PAGE_NUMBER) { + if ((free_pages = global_page_state(NR_FREE_PAGES)) + < FREE_PAGE_NUMBER) { pr_debug("PM: free some memory\n"); shrink_all_memory(FREE_PAGE_NUMBER - free_pages); if (nr_free_pages() < FREE_PAGE_NUMBER) { @@ -88,18 +91,22 @@ static int suspend_prepare(suspend_state_t state) } suspend_console(); - if ((error = device_suspend(PMSG_SUSPEND))) { + error = device_suspend(PMSG_SUSPEND); + if (error) { printk(KERN_ERR "Some devices failed to suspend\n"); - goto Finish; + goto Resume_devices; } - return 0; - Finish: - if (pm_ops->finish) - pm_ops->finish(state); + error = disable_nonboot_cpus(); + if (!error) + return 0; + + enable_nonboot_cpus(); + Resume_devices: + pm_finish(state); + device_resume(); + resume_console(); Thaw: thaw_processes(); - Enable_cpu: - enable_nonboot_cpus(); pm_restore_console(); return error; } @@ -134,12 +141,11 @@ int suspend_enter(suspend_state_t state) static void suspend_finish(suspend_state_t state) { + enable_nonboot_cpus(); + pm_finish(state); device_resume(); resume_console(); thaw_processes(); - enable_nonboot_cpus(); - if (pm_ops && pm_ops->finish) - pm_ops->finish(state); pm_restore_console(); } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index c024606221c..fc53ad06812 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -591,7 +591,7 @@ static unsigned int count_free_highmem_pages(void) for_each_zone(zone) if (populated_zone(zone) && is_highmem(zone)) - cnt += zone->free_pages; + cnt += zone_page_state(zone, NR_FREE_PAGES); return cnt; } @@ -869,7 +869,7 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) for_each_zone(zone) { meta += snapshot_additional_pages(zone); if (!is_highmem(zone)) - free += zone->free_pages; + free += zone_page_state(zone, NR_FREE_PAGES); } nr_pages += count_pages_for_highmem(nr_highmem); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 31aa0390c77..7fb834397a0 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -230,9 +230,10 @@ int swsusp_shrink_memory(void) for_each_zone (zone) if (populated_zone(zone)) { if (is_highmem(zone)) { - highmem_size -= zone->free_pages; + highmem_size -= + zone_page_state(zone, NR_FREE_PAGES); } else { - tmp -= zone->free_pages; + tmp -= zone_page_state(zone, NR_FREE_PAGES); tmp += zone->lowmem_reserve[ZONE_NORMAL]; tmp += snapshot_additional_pages(zone); } diff --git a/kernel/power/user.c b/kernel/power/user.c index f7b7a785a5c..dd09efe7df5 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -37,6 +37,7 @@ static struct snapshot_data { int mode; char frozen; char ready; + char platform_suspend; } snapshot_state; static atomic_t device_available = ATOMIC_INIT(1); @@ -66,6 +67,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->bitmap = NULL; data->frozen = 0; data->ready = 0; + data->platform_suspend = 0; return 0; } @@ -122,6 +124,92 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } +static inline int platform_prepare(void) +{ + int error = 0; + + if (pm_ops && pm_ops->prepare) + error = pm_ops->prepare(PM_SUSPEND_DISK); + + return error; +} + +static inline void platform_finish(void) +{ + if (pm_ops && pm_ops->finish) + pm_ops->finish(PM_SUSPEND_DISK); +} + +static inline int snapshot_suspend(int platform_suspend) +{ + int error; + + mutex_lock(&pm_mutex); + /* Free memory before shutting down devices. */ + error = swsusp_shrink_memory(); + if (error) + goto Finish; + + if (platform_suspend) { + error = platform_prepare(); + if (error) + goto Finish; + } + suspend_console(); + error = device_suspend(PMSG_FREEZE); + if (error) + goto Resume_devices; + + error = disable_nonboot_cpus(); + if (!error) { + in_suspend = 1; + error = swsusp_suspend(); + } + enable_nonboot_cpus(); + Resume_devices: + if (platform_suspend) + platform_finish(); + + device_resume(); + resume_console(); + Finish: + mutex_unlock(&pm_mutex); + return error; +} + +static inline int snapshot_restore(int platform_suspend) +{ + int error; + + mutex_lock(&pm_mutex); + pm_prepare_console(); + if (platform_suspend) { + error = platform_prepare(); + if (error) + goto Finish; + } + suspend_console(); + error = device_suspend(PMSG_PRETHAW); + if (error) + goto Resume_devices; + + error = disable_nonboot_cpus(); + if (!error) + error = swsusp_resume(); + + enable_nonboot_cpus(); + Resume_devices: + if (platform_suspend) + platform_finish(); + + device_resume(); + resume_console(); + Finish: + pm_restore_console(); + mutex_unlock(&pm_mutex); + return error; +} + static int snapshot_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -145,14 +233,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (data->frozen) break; mutex_lock(&pm_mutex); - error = disable_nonboot_cpus(); - if (!error) { - error = freeze_processes(); - if (error) { - thaw_processes(); - enable_nonboot_cpus(); - error = -EBUSY; - } + if (freeze_processes()) { + thaw_processes(); + error = -EBUSY; } mutex_unlock(&pm_mutex); if (!error) @@ -164,7 +247,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; mutex_lock(&pm_mutex); thaw_processes(); - enable_nonboot_cpus(); mutex_unlock(&pm_mutex); data->frozen = 0; break; @@ -174,20 +256,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - mutex_lock(&pm_mutex); - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (!error) { - suspend_console(); - error = device_suspend(PMSG_FREEZE); - if (!error) { - in_suspend = 1; - error = swsusp_suspend(); - device_resume(); - } - resume_console(); - } - mutex_unlock(&pm_mutex); + error = snapshot_suspend(data->platform_suspend); if (!error) error = put_user(in_suspend, (unsigned int __user *)arg); if (!error) @@ -201,17 +270,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - mutex_lock(&pm_mutex); - pm_prepare_console(); - suspend_console(); - error = device_suspend(PMSG_PRETHAW); - if (!error) { - error = swsusp_resume(); - device_resume(); - } - resume_console(); - pm_restore_console(); - mutex_unlock(&pm_mutex); + error = snapshot_restore(data->platform_suspend); break; case SNAPSHOT_FREE: @@ -282,6 +341,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_S2RAM: + if (!pm_ops) { + error = -ENOSYS; + break; + } + if (!data->frozen) { error = -EPERM; break; @@ -319,28 +383,35 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_PMOPS: + error = -EINVAL; + switch (arg) { case PMOPS_PREPARE: - if (pm_ops->prepare) { - error = pm_ops->prepare(PM_SUSPEND_DISK); + if (pm_ops && pm_ops->enter) { + data->platform_suspend = 1; + error = 0; + } else { + error = -ENOSYS; } break; case PMOPS_ENTER: - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - error = pm_ops->enter(PM_SUSPEND_DISK); + if (data->platform_suspend) { + kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); + error = pm_ops->enter(PM_SUSPEND_DISK); + error = 0; + } break; case PMOPS_FINISH: - if (pm_ops && pm_ops->finish) { - pm_ops->finish(PM_SUSPEND_DISK); - } + if (data->platform_suspend) + error = 0; + break; default: printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); - error = -EINVAL; } break; diff --git a/kernel/printk.c b/kernel/printk.c index c770e1a4e88..0c151877ff7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -483,7 +483,7 @@ static int have_callable_console(void) * printk - print a kernel message * @fmt: format string * - * This is printk. It can be called from any context. We want it to work. + * This is printk(). It can be called from any context. We want it to work. * * We try to grab the console_sem. If we succeed, it's easy - we log the output and * call the console drivers. If we fail to get the semaphore we place the output @@ -529,7 +529,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) zap_locks(); /* This stops the holder of console_sem just where we want him */ - local_irq_save(flags); + raw_local_irq_save(flags); lockdep_off(); spin_lock(&logbuf_lock); printk_cpu = smp_processor_id(); @@ -618,7 +618,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) up(&console_sem); } lockdep_on(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } else { /* * Someone else owns the drivers. We drop the spinlock, which @@ -628,7 +628,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) printk_cpu = UINT_MAX; spin_unlock(&logbuf_lock); lockdep_on(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } preempt_enable(); @@ -783,6 +783,12 @@ int is_console_locked(void) return console_locked; } +void wake_up_klogd(void) +{ + if (!oops_in_progress && waitqueue_active(&log_wait)) + wake_up_interruptible(&log_wait); +} + /** * release_console_sem - unlock the console system * @@ -825,8 +831,8 @@ void release_console_sem(void) console_locked = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); - if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) - wake_up_interruptible(&log_wait); + if (wake_klogd) + wake_up_klogd(); } EXPORT_SYMBOL(release_console_sem); diff --git a/kernel/profile.c b/kernel/profile.c index d6579d51106..9bfadb248dd 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -449,7 +449,6 @@ void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) /* create /proc/irq/prof_cpu_mask */ if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir))) return; - entry->nlink = 1; entry->data = (void *)&prof_cpu_mask; entry->read_proc = prof_cpu_mask_read_proc; entry->write_proc = prof_cpu_mask_write_proc; diff --git a/kernel/relay.c b/kernel/relay.c index 284e2e8b4ee..ef8a935710a 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -7,6 +7,8 @@ * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) * * Moved to kernel/relay.c by Paul Mundt, 2006. + * November 2006 - CPU hotplug support by Mathieu Desnoyers + * (mathieu.desnoyers@polymtl.ca) * * This file is released under the GPL. */ @@ -18,6 +20,11 @@ #include <linux/relay.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <linux/cpu.h> + +/* list of open channels, for cpu hotplug */ +static DEFINE_MUTEX(relay_channels_mutex); +static LIST_HEAD(relay_channels); /* * close() vm_op implementation for relay file mapping. @@ -187,6 +194,7 @@ void relay_destroy_buf(struct rchan_buf *buf) __free_page(buf->page_array[i]); kfree(buf->page_array); } + chan->buf[buf->cpu] = NULL; kfree(buf->padding); kfree(buf); kref_put(&chan->kref, relay_destroy_channel); @@ -320,7 +328,7 @@ static void wakeup_readers(struct work_struct *work) * @buf: the channel buffer * @init: 1 if this is a first-time initialization * - * See relay_reset for description of effect. + * See relay_reset() for description of effect. */ static void __relay_reset(struct rchan_buf *buf, unsigned int init) { @@ -356,57 +364,75 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) * and restarting the channel in its initial state. The buffers * are not freed, so any mappings are still in effect. * - * NOTE: Care should be taken that the channel isn't actually + * NOTE. Care should be taken that the channel isn't actually * being used by anything when this call is made. */ void relay_reset(struct rchan *chan) { unsigned int i; - struct rchan_buf *prev = NULL; if (!chan) return; - for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i] || chan->buf[i] == prev) - break; - __relay_reset(chan->buf[i], 0); - prev = chan->buf[i]; + if (chan->is_global && chan->buf[0]) { + __relay_reset(chan->buf[0], 0); + return; } + + mutex_lock(&relay_channels_mutex); + for_each_online_cpu(i) + if (chan->buf[i]) + __relay_reset(chan->buf[i], 0); + mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_reset); /* * relay_open_buf - create a new relay channel buffer * - * Internal - used by relay_open(). + * used by relay_open() and CPU hotplug. */ -static struct rchan_buf *relay_open_buf(struct rchan *chan, - const char *filename, - struct dentry *parent, - int *is_global) +static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) { - struct rchan_buf *buf; + struct rchan_buf *buf = NULL; struct dentry *dentry; + char *tmpname; - if (*is_global) + if (chan->is_global) return chan->buf[0]; + tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmpname) + goto end; + snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu); + buf = relay_create_buf(chan); if (!buf) - return NULL; + goto free_name; + + buf->cpu = cpu; + __relay_reset(buf, 1); /* Create file in fs */ - dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR, - buf, is_global); - if (!dentry) { - relay_destroy_buf(buf); - return NULL; - } + dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, + buf, &chan->is_global); + if (!dentry) + goto free_buf; buf->dentry = dentry; - __relay_reset(buf, 1); + if(chan->is_global) { + chan->buf[0] = buf; + buf->cpu = 0; + } + + goto free_name; + +free_buf: + relay_destroy_buf(buf); +free_name: + kfree(tmpname); +end: return buf; } @@ -448,31 +474,71 @@ static void setup_callbacks(struct rchan *chan, } /** + * + * relay_hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD) + */ +static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + unsigned int hotcpu = (unsigned long)hcpu; + struct rchan *chan; + + switch(action) { + case CPU_UP_PREPARE: + mutex_lock(&relay_channels_mutex); + list_for_each_entry(chan, &relay_channels, list) { + if (chan->buf[hotcpu]) + continue; + chan->buf[hotcpu] = relay_open_buf(chan, hotcpu); + if(!chan->buf[hotcpu]) { + printk(KERN_ERR + "relay_hotcpu_callback: cpu %d buffer " + "creation failed\n", hotcpu); + mutex_unlock(&relay_channels_mutex); + return NOTIFY_BAD; + } + } + mutex_unlock(&relay_channels_mutex); + break; + case CPU_DEAD: + /* No need to flush the cpu : will be flushed upon + * final relay_flush() call. */ + break; + } + return NOTIFY_OK; +} + +/** * relay_open - create a new relay channel * @base_filename: base name of files to create * @parent: dentry of parent directory, %NULL for root directory * @subbuf_size: size of sub-buffers * @n_subbufs: number of sub-buffers * @cb: client callback functions + * @private_data: user-defined data * * Returns channel pointer if successful, %NULL otherwise. * * Creates a channel buffer for each cpu using the sizes and * attributes specified. The created channel buffer files * will be named base_filename0...base_filenameN-1. File - * permissions will be S_IRUSR. + * permissions will be %S_IRUSR. */ struct rchan *relay_open(const char *base_filename, struct dentry *parent, size_t subbuf_size, size_t n_subbufs, - struct rchan_callbacks *cb) + struct rchan_callbacks *cb, + void *private_data) { unsigned int i; struct rchan *chan; - char *tmpname; - int is_global = 0; - if (!base_filename) return NULL; @@ -487,38 +553,32 @@ struct rchan *relay_open(const char *base_filename, chan->n_subbufs = n_subbufs; chan->subbuf_size = subbuf_size; chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); + chan->parent = parent; + chan->private_data = private_data; + strlcpy(chan->base_filename, base_filename, NAME_MAX); setup_callbacks(chan, cb); kref_init(&chan->kref); - tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL); - if (!tmpname) - goto free_chan; - + mutex_lock(&relay_channels_mutex); for_each_online_cpu(i) { - sprintf(tmpname, "%s%d", base_filename, i); - chan->buf[i] = relay_open_buf(chan, tmpname, parent, - &is_global); + chan->buf[i] = relay_open_buf(chan, i); if (!chan->buf[i]) goto free_bufs; - - chan->buf[i]->cpu = i; } + list_add(&chan->list, &relay_channels); + mutex_unlock(&relay_channels_mutex); - kfree(tmpname); return chan; free_bufs: - for (i = 0; i < NR_CPUS; i++) { + for_each_online_cpu(i) { if (!chan->buf[i]) break; relay_close_buf(chan->buf[i]); - if (is_global) - break; } - kfree(tmpname); -free_chan: kref_put(&chan->kref, relay_destroy_channel); + mutex_unlock(&relay_channels_mutex); return NULL; } EXPORT_SYMBOL_GPL(relay_open); @@ -588,7 +648,7 @@ EXPORT_SYMBOL_GPL(relay_switch_subbuf); * subbufs_consumed should be the number of sub-buffers newly consumed, * not the total consumed. * - * NOTE: Kernel clients don't need to call this function if the channel + * NOTE. Kernel clients don't need to call this function if the channel * mode is 'overwrite'. */ void relay_subbufs_consumed(struct rchan *chan, @@ -619,24 +679,26 @@ EXPORT_SYMBOL_GPL(relay_subbufs_consumed); void relay_close(struct rchan *chan) { unsigned int i; - struct rchan_buf *prev = NULL; if (!chan) return; - for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i] || chan->buf[i] == prev) - break; - relay_close_buf(chan->buf[i]); - prev = chan->buf[i]; - } + mutex_lock(&relay_channels_mutex); + if (chan->is_global && chan->buf[0]) + relay_close_buf(chan->buf[0]); + else + for_each_possible_cpu(i) + if (chan->buf[i]) + relay_close_buf(chan->buf[i]); if (chan->last_toobig) printk(KERN_WARNING "relay: one or more items not logged " "[item size (%Zd) > sub-buffer size (%Zd)]\n", chan->last_toobig, chan->subbuf_size); + list_del(&chan->list); kref_put(&chan->kref, relay_destroy_channel); + mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_close); @@ -649,17 +711,20 @@ EXPORT_SYMBOL_GPL(relay_close); void relay_flush(struct rchan *chan) { unsigned int i; - struct rchan_buf *prev = NULL; if (!chan) return; - for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i] || chan->buf[i] == prev) - break; - relay_switch_subbuf(chan->buf[i], 0); - prev = chan->buf[i]; + if (chan->is_global && chan->buf[0]) { + relay_switch_subbuf(chan->buf[0], 0); + return; } + + mutex_lock(&relay_channels_mutex); + for_each_possible_cpu(i) + if (chan->buf[i]) + relay_switch_subbuf(chan->buf[i], 0); + mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_flush); @@ -684,7 +749,7 @@ static int relay_file_open(struct inode *inode, struct file *filp) * @filp: the file * @vma: the vma describing what to map * - * Calls upon relay_mmap_buf to map the file into user space. + * Calls upon relay_mmap_buf() to map the file into user space. */ static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma) { @@ -826,7 +891,7 @@ static size_t relay_file_read_subbuf_avail(size_t read_pos, * @read_pos: file read position * @buf: relay channel buffer * - * If the read_pos is in the middle of padding, return the + * If the @read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise * return the original value. */ @@ -1022,3 +1087,12 @@ const struct file_operations relay_file_operations = { .sendfile = relay_file_sendfile, }; EXPORT_SYMBOL_GPL(relay_file_operations); + +static __init int relay_init(void) +{ + + hotcpu_notifier(relay_hotcpu_callback, 0); + return 0; +} + +module_init(relay_init); diff --git a/kernel/resource.c b/kernel/resource.c index 7b9a497419d..2a3f8863658 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -17,6 +17,7 @@ #include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/device.h> #include <asm/io.h> @@ -618,6 +619,67 @@ void __release_region(struct resource *parent, resource_size_t start, EXPORT_SYMBOL(__release_region); /* + * Managed region resource + */ +struct region_devres { + struct resource *parent; + resource_size_t start; + resource_size_t n; +}; + +static void devm_region_release(struct device *dev, void *res) +{ + struct region_devres *this = res; + + __release_region(this->parent, this->start, this->n); +} + +static int devm_region_match(struct device *dev, void *res, void *match_data) +{ + struct region_devres *this = res, *match = match_data; + + return this->parent == match->parent && + this->start == match->start && this->n == match->n; +} + +struct resource * __devm_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name) +{ + struct region_devres *dr = NULL; + struct resource *res; + + dr = devres_alloc(devm_region_release, sizeof(struct region_devres), + GFP_KERNEL); + if (!dr) + return NULL; + + dr->parent = parent; + dr->start = start; + dr->n = n; + + res = __request_region(parent, start, n, name); + if (res) + devres_add(dev, dr); + else + devres_free(dr); + + return res; +} +EXPORT_SYMBOL(__devm_request_region); + +void __devm_release_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n) +{ + struct region_devres match_data = { parent, start, n }; + + __release_region(parent, start, n); + WARN_ON(devres_destroy(dev, devm_region_release, devm_region_match, + &match_data)); +} +EXPORT_SYMBOL(__devm_release_region); + +/* * Called from init/main.c to reserve IO ports. */ #define MAXRESERVE 4 diff --git a/kernel/sched.c b/kernel/sched.c index cca93cc0dd7..08f86178aa3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -57,6 +57,16 @@ #include <asm/unistd.h> /* + * Scheduler clock - returns current time in nanosec units. + * This is default implementation. + * Architectures and sub-architectures can override this. + */ +unsigned long long __attribute__((weak)) sched_clock(void) +{ + return (unsigned long long)jiffies * (1000000000 / HZ); +} + +/* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], * and back. @@ -2887,14 +2897,16 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) static void update_load(struct rq *this_rq) { unsigned long this_load; - int i, scale; + unsigned int i, scale; this_load = this_rq->raw_weighted_load; /* Update our load: */ - for (i = 0, scale = 1; i < 3; i++, scale <<= 1) { + for (i = 0, scale = 1; i < 3; i++, scale += scale) { unsigned long old_load, new_load; + /* scale is effectively 1 << i now, and >> i divides by scale */ + old_load = this_rq->cpu_load[i]; new_load = this_load; /* @@ -2904,7 +2916,7 @@ static void update_load(struct rq *this_rq) */ if (new_load > old_load) new_load += scale-1; - this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; + this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; } } @@ -4193,13 +4205,12 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) } /** - * sched_setscheduler - change the scheduling policy and/or RT priority of - * a thread. + * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. * @p: the task in question. * @policy: new policy. * @param: structure containing the new RT priority. * - * NOTE: the task may be already dead + * NOTE that the task may be already dead. */ int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) @@ -4567,7 +4578,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, /** * sys_sched_yield - yield the current processor to other threads. * - * this function yields the current CPU by moving the calling thread + * This function yields the current CPU by moving the calling thread * to the expired array. If there are no other threads running on this * CPU then this function will return. */ @@ -4694,7 +4705,7 @@ EXPORT_SYMBOL(cond_resched_softirq); /** * yield - yield the current processor to other threads. * - * this is a shortcut for kernel-space yielding - it marks the + * This is a shortcut for kernel-space yielding - it marks the * thread runnable and calls sys_sched_yield(). */ void __sched yield(void) diff --git a/kernel/signal.c b/kernel/signal.c index 5630255d2e2..8072e568bbe 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1096,42 +1096,21 @@ int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) return retval; } -int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) -{ - if (pgrp <= 0) - return -EINVAL; - - return __kill_pgrp_info(sig, info, find_pid(pgrp)); -} - -int -kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) -{ - int retval; - - read_lock(&tasklist_lock); - retval = __kill_pg_info(sig, info, pgrp); - read_unlock(&tasklist_lock); - - return retval; -} - int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) { int error; - int acquired_tasklist_lock = 0; struct task_struct *p; rcu_read_lock(); - if (unlikely(sig_needs_tasklist(sig))) { + if (unlikely(sig_needs_tasklist(sig))) read_lock(&tasklist_lock); - acquired_tasklist_lock = 1; - } + p = pid_task(pid, PIDTYPE_PID); error = -ESRCH; if (p) error = group_send_sig_info(sig, info, p); - if (unlikely(acquired_tasklist_lock)) + + if (unlikely(sig_needs_tasklist(sig))) read_unlock(&tasklist_lock); rcu_read_unlock(); return error; @@ -1192,8 +1171,10 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); static int kill_something_info(int sig, struct siginfo *info, int pid) { + int ret; + rcu_read_lock(); if (!pid) { - return kill_pg_info(sig, info, process_group(current)); + ret = kill_pgrp_info(sig, info, task_pgrp(current)); } else if (pid == -1) { int retval = 0, count = 0; struct task_struct * p; @@ -1208,12 +1189,14 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) } } read_unlock(&tasklist_lock); - return count ? retval : -ESRCH; + ret = count ? retval : -ESRCH; } else if (pid < 0) { - return kill_pg_info(sig, info, -pid); + ret = kill_pgrp_info(sig, info, find_pid(-pid)); } else { - return kill_proc_info(sig, info, pid); + ret = kill_pid_info(sig, info, find_pid(pid)); } + rcu_read_unlock(); + return ret; } /* @@ -1312,12 +1295,6 @@ int kill_pid(struct pid *pid, int sig, int priv) EXPORT_SYMBOL(kill_pid); int -kill_pg(pid_t pgrp, int sig, int priv) -{ - return kill_pg_info(sig, __si_special(priv), pgrp); -} - -int kill_proc(pid_t pid, int sig, int priv) { return kill_proc_info(sig, __si_special(priv), pid); @@ -1906,7 +1883,7 @@ relock: /* signals can be posted during this window */ - if (is_orphaned_pgrp(process_group(current))) + if (is_current_pgrp_orphaned()) goto relock; spin_lock_irq(¤t->sighand->siglock); @@ -1956,7 +1933,6 @@ EXPORT_SYMBOL(recalc_sigpending); EXPORT_SYMBOL_GPL(dequeue_signal); EXPORT_SYMBOL(flush_signals); EXPORT_SYMBOL(force_sig); -EXPORT_SYMBOL(kill_pg); EXPORT_SYMBOL(kill_proc); EXPORT_SYMBOL(ptrace_notify); EXPORT_SYMBOL(send_sig); @@ -2283,7 +2259,7 @@ static int do_tkill(int tgid, int pid, int sig) * @pid: the PID of the thread * @sig: signal to be sent * - * This syscall also checks the tgid and returns -ESRCH even if the PID + * This syscall also checks the @tgid and returns -ESRCH even if the PID * exists but it's not belonging to the target process anymore. This * method solves the problem of threads exiting and PIDs getting reused. */ diff --git a/kernel/sys.c b/kernel/sys.c index 6e2101dec0f..123b165080e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -215,7 +215,7 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); * This routine uses RCU to synchronize with changes to the chain. * * If the return value of the notifier can be and'ed - * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain + * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain() * will return immediately, with the return value of * the notifier function which halted execution. * Otherwise the return value is the return value @@ -313,7 +313,7 @@ EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); * run in a process context, so they are allowed to block. * * If the return value of the notifier can be and'ed - * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain + * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain() * will return immediately, with the return value of * the notifier function which halted execution. * Otherwise the return value is the return value @@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); * All locking must be provided by the caller. * * If the return value of the notifier can be and'ed - * with %NOTIFY_STOP_MASK then raw_notifier_call_chain + * with %NOTIFY_STOP_MASK then raw_notifier_call_chain() * will return immediately, with the return value of * the notifier function which halted execution. * Otherwise the return value is the return value @@ -487,7 +487,7 @@ EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); * run in a process context, so they are allowed to block. * * If the return value of the notifier can be and'ed - * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain + * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain() * will return immediately, with the return value of * the notifier function which halted execution. * Otherwise the return value is the return value @@ -538,7 +538,7 @@ EXPORT_SYMBOL_GPL(srcu_init_notifier_head); * Registers a function with the list of functions * to be called at reboot time. * - * Currently always returns zero, as blocking_notifier_chain_register + * Currently always returns zero, as blocking_notifier_chain_register() * always returns zero. */ @@ -596,6 +596,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) struct task_struct *g, *p; struct user_struct *user; int error = -EINVAL; + struct pid *pgrp; if (which > 2 || which < 0) goto out; @@ -610,18 +611,21 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: - if (!who) - who = current->pid; - p = find_task_by_pid(who); + if (who) + p = find_task_by_pid(who); + else + p = current; if (p) error = set_one_prio(p, niceval, error); break; case PRIO_PGRP: - if (!who) - who = process_group(current); - do_each_task_pid(who, PIDTYPE_PGID, p) { + if (who) + pgrp = find_pid(who); + else + pgrp = task_pgrp(current); + do_each_pid_task(pgrp, PIDTYPE_PGID, p) { error = set_one_prio(p, niceval, error); - } while_each_task_pid(who, PIDTYPE_PGID, p); + } while_each_pid_task(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: user = current->user; @@ -656,6 +660,7 @@ asmlinkage long sys_getpriority(int which, int who) struct task_struct *g, *p; struct user_struct *user; long niceval, retval = -ESRCH; + struct pid *pgrp; if (which > 2 || which < 0) return -EINVAL; @@ -663,9 +668,10 @@ asmlinkage long sys_getpriority(int which, int who) read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: - if (!who) - who = current->pid; - p = find_task_by_pid(who); + if (who) + p = find_task_by_pid(who); + else + p = current; if (p) { niceval = 20 - task_nice(p); if (niceval > retval) @@ -673,13 +679,15 @@ asmlinkage long sys_getpriority(int which, int who) } break; case PRIO_PGRP: - if (!who) - who = process_group(current); - do_each_task_pid(who, PIDTYPE_PGID, p) { + if (who) + pgrp = find_pid(who); + else + pgrp = task_pgrp(current); + do_each_pid_task(pgrp, PIDTYPE_PGID, p) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; - } while_each_task_pid(who, PIDTYPE_PGID, p); + } while_each_pid_task(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: user = current->user; @@ -1388,7 +1396,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (p->real_parent == group_leader) { err = -EPERM; - if (process_session(p) != process_session(group_leader)) + if (task_session(p) != task_session(group_leader)) goto out; err = -EACCES; if (p->did_exec) @@ -1407,7 +1415,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) struct task_struct *g = find_task_by_pid_type(PIDTYPE_PGID, pgid); - if (!g || process_session(g) != process_session(group_leader)) + if (!g || task_session(g) != task_session(group_leader)) goto out; } @@ -1510,7 +1518,6 @@ asmlinkage long sys_setsid(void) spin_lock(&group_leader->sighand->siglock); group_leader->signal->tty = NULL; - group_leader->signal->tty_old_pgrp = 0; spin_unlock(&group_leader->sighand->siglock); err = process_group(group_leader); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 600b33358de..e0ac6cd79fc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -151,6 +151,8 @@ static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, #ifdef CONFIG_PROC_SYSCTL static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif static ctl_table root_table[]; @@ -174,6 +176,7 @@ extern ctl_table inotify_table[]; int sysctl_legacy_va_layout; #endif + static void *get_uts(ctl_table *table, int write) { char *which = table->data; @@ -344,14 +347,16 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, +#ifdef CONFIG_PROC_SYSCTL { .ctl_name = KERN_TAINTED, .procname = "tainted", .data = &tainted, .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, + .mode = 0644, + .proc_handler = &proc_dointvec_taint, }, +#endif { .ctl_name = KERN_CAP_BSET, .procname = "cap-bound", @@ -1681,13 +1686,12 @@ static int _proc_do_string(void* data, int maxlen, int write, size_t len; char __user *p; char c; - - if (!data || !maxlen || !*lenp || - (*ppos && !write)) { + + if (!data || !maxlen || !*lenp) { *lenp = 0; return 0; } - + if (write) { len = 0; p = buffer; @@ -1708,6 +1712,15 @@ static int _proc_do_string(void* data, int maxlen, int write, len = strlen(data); if (len > maxlen) len = maxlen; + + if (*ppos > len) { + *lenp = 0; + return 0; + } + + data += *ppos; + len -= *ppos; + if (len > *lenp) len = *lenp; if (len) @@ -1927,6 +1940,7 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp, #define OP_SET 0 #define OP_AND 1 +#define OP_OR 2 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, int *valp, @@ -1938,6 +1952,7 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp, switch(op) { case OP_SET: *valp = val; break; case OP_AND: *valp &= val; break; + case OP_OR: *valp |= val; break; } } else { int val = *valp; @@ -1961,7 +1976,7 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, { int op; - if (!capable(CAP_SYS_MODULE)) { + if (write && !capable(CAP_SYS_MODULE)) { return -EPERM; } @@ -1970,6 +1985,22 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, do_proc_dointvec_bset_conv,&op); } +/* + * Taint values can only be increased + */ +static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int op; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + op = OP_OR; + return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + do_proc_dointvec_bset_conv,&op); +} + struct do_proc_dointvec_minmax_conv_param { int *min; int *max; @@ -2553,17 +2584,23 @@ int sysctl_jiffies(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - if (oldval) { + if (oldval && oldlenp) { size_t olen; - if (oldlenp) { - if (get_user(olen, oldlenp)) + + if (get_user(olen, oldlenp)) + return -EFAULT; + if (olen) { + int val; + + if (olen < sizeof(int)) + return -EINVAL; + + val = *(int *)(table->data) / HZ; + if (put_user(val, (int __user *)oldval)) + return -EFAULT; + if (put_user(sizeof(int), oldlenp)) return -EFAULT; - if (olen!=sizeof(int)) - return -EINVAL; } - if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) || - (oldlenp && put_user(sizeof(int),oldlenp))) - return -EFAULT; } if (newval && newlen) { int new; @@ -2581,17 +2618,23 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - if (oldval) { + if (oldval && oldlenp) { size_t olen; - if (oldlenp) { - if (get_user(olen, oldlenp)) + + if (get_user(olen, oldlenp)) + return -EFAULT; + if (olen) { + int val; + + if (olen < sizeof(int)) + return -EINVAL; + + val = jiffies_to_msecs(*(int *)(table->data)); + if (put_user(val, (int __user *)oldval)) + return -EFAULT; + if (put_user(sizeof(int), oldlenp)) return -EFAULT; - if (olen!=sizeof(int)) - return -EINVAL; } - if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) || - (oldlenp && put_user(sizeof(int),oldlenp))) - return -EFAULT; } if (newval && newlen) { int new; @@ -2732,12 +2775,14 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen, { return -ENOSYS; } +#ifdef CONFIG_SYSVIPC static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { return -ENOSYS; } +#endif #endif /* CONFIG_SYSCTL_SYSCALL */ /* diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 22504afc0d3..d9ef176c4e0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -28,6 +28,7 @@ #include <linux/sysdev.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ /* XXX - Would like a better way for initializing curr_clocksource */ extern struct clocksource clocksource_jiffies; diff --git a/kernel/timer.c b/kernel/timer.c index c2a8ccfc288..8533c379608 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -85,7 +85,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; * @j: the time in (absolute) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * - * __round_jiffies rounds an absolute time in the future (in jiffies) + * __round_jiffies() rounds an absolute time in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -98,7 +98,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; * processors firing at the exact same time, which could lead * to lock contention or spurious cache line bouncing. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long __round_jiffies(unsigned long j, int cpu) { @@ -142,7 +142,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies); * @j: the time in (relative) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * - * __round_jiffies_relative rounds a time delta in the future (in jiffies) + * __round_jiffies_relative() rounds a time delta in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies); * processors firing at the exact same time, which could lead * to lock contention or spurious cache line bouncing. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { @@ -173,7 +173,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); * round_jiffies - function to round jiffies to a full second * @j: the time in (absolute) jiffies that should be rounded * - * round_jiffies rounds an absolute time in the future (in jiffies) + * round_jiffies() rounds an absolute time in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -182,7 +182,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long round_jiffies(unsigned long j) { @@ -194,7 +194,7 @@ EXPORT_SYMBOL_GPL(round_jiffies); * round_jiffies_relative - function to round jiffies to a full second * @j: the time in (relative) jiffies that should be rounded * - * round_jiffies_relative rounds a time delta in the future (in jiffies) + * round_jiffies_relative() rounds a time delta in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -203,7 +203,7 @@ EXPORT_SYMBOL_GPL(round_jiffies); * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long round_jiffies_relative(unsigned long j) { @@ -387,7 +387,7 @@ void add_timer_on(struct timer_list *timer, int cpu) * @timer: the timer to be modified * @expires: new timeout in jiffies * - * mod_timer is a more efficient way to update the expire field of an + * mod_timer() is a more efficient way to update the expire field of an * active timer (if the timer is inactive it will be activated) * * mod_timer(timer, expires) is equivalent to: @@ -490,7 +490,7 @@ out: * the timer it also makes sure the handler has finished executing on other * CPUs. * - * Synchronization rules: callers must prevent restarting of the timer, + * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts. The caller must not hold locks which would prevent * completion of the timer's handler. The timer's handler must not call @@ -1392,17 +1392,16 @@ asmlinkage long sys_gettid(void) } /** - * sys_sysinfo - fill in sysinfo struct + * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill */ -asmlinkage long sys_sysinfo(struct sysinfo __user *info) +int do_sysinfo(struct sysinfo *info) { - struct sysinfo val; unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; unsigned long seq; - memset((char *)&val, 0, sizeof(struct sysinfo)); + memset(info, 0, sizeof(struct sysinfo)); do { struct timespec tp; @@ -1422,17 +1421,17 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; } - val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); + info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); + info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); - val.procs = nr_threads; + info->procs = nr_threads; } while (read_seqretry(&xtime_lock, seq)); - si_meminfo(&val); - si_swapinfo(&val); + si_meminfo(info); + si_swapinfo(info); /* * If the sum of all the available memory (i.e. ram + swap) @@ -1443,11 +1442,11 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) * -Erik Andersen <andersee@debian.org> */ - mem_total = val.totalram + val.totalswap; - if (mem_total < val.totalram || mem_total < val.totalswap) + mem_total = info->totalram + info->totalswap; + if (mem_total < info->totalram || mem_total < info->totalswap) goto out; bitcount = 0; - mem_unit = val.mem_unit; + mem_unit = info->mem_unit; while (mem_unit > 1) { bitcount++; mem_unit >>= 1; @@ -1459,22 +1458,31 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) /* * If mem_total did not overflow, multiply all memory values by - * val.mem_unit and set it to 1. This leaves things compatible + * info->mem_unit and set it to 1. This leaves things compatible * with 2.2.x, and also retains compatibility with earlier 2.4.x * kernels... */ - val.mem_unit = 1; - val.totalram <<= bitcount; - val.freeram <<= bitcount; - val.sharedram <<= bitcount; - val.bufferram <<= bitcount; - val.totalswap <<= bitcount; - val.freeswap <<= bitcount; - val.totalhigh <<= bitcount; - val.freehigh <<= bitcount; + info->mem_unit = 1; + info->totalram <<= bitcount; + info->freeram <<= bitcount; + info->sharedram <<= bitcount; + info->bufferram <<= bitcount; + info->totalswap <<= bitcount; + info->freeswap <<= bitcount; + info->totalhigh <<= bitcount; + info->freehigh <<= bitcount; + +out: + return 0; +} + +asmlinkage long sys_sysinfo(struct sysinfo __user *info) +{ + struct sysinfo val; + + do_sysinfo(&val); - out: if (copy_to_user(info, &val, sizeof(struct sysinfo))) return -EFAULT; @@ -1624,7 +1632,7 @@ struct time_interpolator *time_interpolator __read_mostly; static struct time_interpolator *time_interpolator_list __read_mostly; static DEFINE_SPINLOCK(time_interpolator_lock); -static inline u64 time_interpolator_get_cycles(unsigned int src) +static inline cycles_t time_interpolator_get_cycles(unsigned int src) { unsigned long (*x)(void); @@ -1650,8 +1658,8 @@ static inline u64 time_interpolator_get_counter(int writelock) if (time_interpolator->jitter) { - u64 lcycle; - u64 now; + cycles_t lcycle; + cycles_t now; do { lcycle = time_interpolator->last_cycle; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a3da07c5af2..020d1fff57d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -656,8 +656,7 @@ void flush_scheduled_work(void) EXPORT_SYMBOL(flush_scheduled_work); /** - * cancel_rearming_delayed_workqueue - reliably kill off a delayed - * work whose handler rearms the delayed work. + * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work. * @wq: the controlling workqueue structure * @dwork: the delayed work struct */ @@ -670,8 +669,7 @@ void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, EXPORT_SYMBOL(cancel_rearming_delayed_workqueue); /** - * cancel_rearming_delayed_work - reliably kill off a delayed keventd - * work whose handler rearms the delayed work. + * cancel_rearming_delayed_work - reliably kill off a delayed keventd work whose handler rearms the delayed work. * @dwork: the delayed work struct */ void cancel_rearming_delayed_work(struct delayed_work *dwork) |