diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/capability.c | 16 | ||||
-rw-r--r-- | kernel/cpu.c | 3 | ||||
-rw-r--r-- | kernel/cpuset.c | 152 | ||||
-rw-r--r-- | kernel/exec_domain.c | 1 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/irq/Makefile | 3 | ||||
-rw-r--r-- | kernel/irq/manage.c | 23 | ||||
-rw-r--r-- | kernel/irq/migration.c | 65 | ||||
-rw-r--r-- | kernel/itimer.c | 103 | ||||
-rw-r--r-- | kernel/ksysfs.c | 4 | ||||
-rw-r--r-- | kernel/kthread.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 202 | ||||
-rw-r--r-- | kernel/params.c | 12 | ||||
-rw-r--r-- | kernel/power/smp.c | 4 | ||||
-rw-r--r-- | kernel/printk.c | 76 | ||||
-rw-r--r-- | kernel/rcupdate.c | 5 | ||||
-rw-r--r-- | kernel/rcutorture.c | 33 | ||||
-rw-r--r-- | kernel/softlockup.c | 55 | ||||
-rw-r--r-- | kernel/sys.c | 68 | ||||
-rw-r--r-- | kernel/sysctl.c | 19 | ||||
-rw-r--r-- | kernel/time.c | 59 | ||||
-rw-r--r-- | kernel/timer.c | 74 | ||||
-rw-r--r-- | kernel/user.c | 10 |
23 files changed, 549 insertions, 441 deletions
diff --git a/kernel/capability.c b/kernel/capability.c index bfa3c92e16f..1a4d8a40d3f 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -233,3 +233,19 @@ out: return ret; } + +int __capable(struct task_struct *t, int cap) +{ + if (security_capable(t, cap) == 0) { + t->flags |= PF_SUPERPRIV; + return 1; + } + return 0; +} +EXPORT_SYMBOL(__capable); + +int capable(int cap) +{ + return __capable(current, cap); +} +EXPORT_SYMBOL(capable); diff --git a/kernel/cpu.c b/kernel/cpu.c index e882c6babf4..8be22bd8093 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -223,8 +223,7 @@ int __devinit cpu_up(unsigned int cpu) ret = __cpu_up(cpu); if (ret != 0) goto out_notify; - if (!cpu_online(cpu)) - BUG(); + BUG_ON(!cpu_online(cpu)); /* Now call notifier in preparation. */ notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c86ee051b73..18aea1bd128 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -4,15 +4,14 @@ * Processor and Memory placement constraints for sets of tasks. * * Copyright (C) 2003 BULL SA. - * Copyright (C) 2004 Silicon Graphics, Inc. + * Copyright (C) 2004-2006 Silicon Graphics, Inc. * * Portions derived from Patrick Mochel's sysfs code. * sysfs is Copyright (c) 2001-3 Patrick Mochel - * Portions Copyright (c) 2004 Silicon Graphics, Inc. * - * 2003-10-10 Written by Simon Derr <simon.derr@bull.net> + * 2003-10-10 Written by Simon Derr. * 2003-10-22 Updates by Stephen Hemminger. - * 2004 May-July Rework by Paul Jackson <pj@sgi.com> + * 2004 May-July Rework by Paul Jackson. * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of the Linux @@ -108,37 +107,49 @@ typedef enum { CS_MEM_EXCLUSIVE, CS_MEMORY_MIGRATE, CS_REMOVED, - CS_NOTIFY_ON_RELEASE + CS_NOTIFY_ON_RELEASE, + CS_SPREAD_PAGE, + CS_SPREAD_SLAB, } cpuset_flagbits_t; /* convenient tests for these bits */ static inline int is_cpu_exclusive(const struct cpuset *cs) { - return !!test_bit(CS_CPU_EXCLUSIVE, &cs->flags); + return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); } static inline int is_mem_exclusive(const struct cpuset *cs) { - return !!test_bit(CS_MEM_EXCLUSIVE, &cs->flags); + return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); } static inline int is_removed(const struct cpuset *cs) { - return !!test_bit(CS_REMOVED, &cs->flags); + return test_bit(CS_REMOVED, &cs->flags); } static inline int notify_on_release(const struct cpuset *cs) { - return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); + return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); } static inline int is_memory_migrate(const struct cpuset *cs) { - return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags); + return test_bit(CS_MEMORY_MIGRATE, &cs->flags); +} + +static inline int is_spread_page(const struct cpuset *cs) +{ + return test_bit(CS_SPREAD_PAGE, &cs->flags); +} + +static inline int is_spread_slab(const struct cpuset *cs) +{ + return test_bit(CS_SPREAD_SLAB, &cs->flags); } /* - * Increment this atomic integer everytime any cpuset changes its + * Increment this integer everytime any cpuset changes its * mems_allowed value. Users of cpusets can track this generation * number, and avoid having to lock and reload mems_allowed unless * the cpuset they're using changes generation. @@ -152,8 +163,11 @@ static inline int is_memory_migrate(const struct cpuset *cs) * on every visit to __alloc_pages(), to efficiently check whether * its current->cpuset->mems_allowed has changed, requiring an update * of its current->mems_allowed. + * + * Since cpuset_mems_generation is guarded by manage_mutex, + * there is no need to mark it atomic. */ -static atomic_t cpuset_mems_generation = ATOMIC_INIT(1); +static int cpuset_mems_generation; static struct cpuset top_cpuset = { .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), @@ -657,6 +671,14 @@ void cpuset_update_task_memory_state(void) cs = tsk->cpuset; /* Maybe changed when task not locked */ guarantee_online_mems(cs, &tsk->mems_allowed); tsk->cpuset_mems_generation = cs->mems_generation; + if (is_spread_page(cs)) + tsk->flags |= PF_SPREAD_PAGE; + else + tsk->flags &= ~PF_SPREAD_PAGE; + if (is_spread_slab(cs)) + tsk->flags |= PF_SPREAD_SLAB; + else + tsk->flags &= ~PF_SPREAD_SLAB; task_unlock(tsk); mutex_unlock(&callback_mutex); mpol_rebind_task(tsk, &tsk->mems_allowed); @@ -858,8 +880,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) mutex_lock(&callback_mutex); cs->mems_allowed = trialcs.mems_allowed; - atomic_inc(&cpuset_mems_generation); - cs->mems_generation = atomic_read(&cpuset_mems_generation); + cs->mems_generation = cpuset_mems_generation++; mutex_unlock(&callback_mutex); set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ @@ -957,7 +978,8 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) /* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) + * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, + * CS_SPREAD_PAGE, CS_SPREAD_SLAB) * cs: the cpuset to update * buf: the buffer where we read the 0 or 1 * @@ -1188,6 +1210,8 @@ typedef enum { FILE_NOTIFY_ON_RELEASE, FILE_MEMORY_PRESSURE_ENABLED, FILE_MEMORY_PRESSURE, + FILE_SPREAD_PAGE, + FILE_SPREAD_SLAB, FILE_TASKLIST, } cpuset_filetype_t; @@ -1247,6 +1271,14 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us case FILE_MEMORY_PRESSURE: retval = -EACCES; break; + case FILE_SPREAD_PAGE: + retval = update_flag(CS_SPREAD_PAGE, cs, buffer); + cs->mems_generation = cpuset_mems_generation++; + break; + case FILE_SPREAD_SLAB: + retval = update_flag(CS_SPREAD_SLAB, cs, buffer); + cs->mems_generation = cpuset_mems_generation++; + break; case FILE_TASKLIST: retval = attach_task(cs, buffer, &pathbuf); break; @@ -1356,6 +1388,12 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, case FILE_MEMORY_PRESSURE: s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); break; + case FILE_SPREAD_PAGE: + *s++ = is_spread_page(cs) ? '1' : '0'; + break; + case FILE_SPREAD_SLAB: + *s++ = is_spread_slab(cs) ? '1' : '0'; + break; default: retval = -EINVAL; goto out; @@ -1719,6 +1757,16 @@ static struct cftype cft_memory_pressure = { .private = FILE_MEMORY_PRESSURE, }; +static struct cftype cft_spread_page = { + .name = "memory_spread_page", + .private = FILE_SPREAD_PAGE, +}; + +static struct cftype cft_spread_slab = { + .name = "memory_spread_slab", + .private = FILE_SPREAD_SLAB, +}; + static int cpuset_populate_dir(struct dentry *cs_dentry) { int err; @@ -1737,6 +1785,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry) return err; if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) return err; + if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0) + return err; + if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0) + return err; if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) return err; return 0; @@ -1765,13 +1817,16 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) cs->flags = 0; if (notify_on_release(parent)) set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); + if (is_spread_page(parent)) + set_bit(CS_SPREAD_PAGE, &cs->flags); + if (is_spread_slab(parent)) + set_bit(CS_SPREAD_SLAB, &cs->flags); cs->cpus_allowed = CPU_MASK_NONE; cs->mems_allowed = NODE_MASK_NONE; atomic_set(&cs->count, 0); INIT_LIST_HEAD(&cs->sibling); INIT_LIST_HEAD(&cs->children); - atomic_inc(&cpuset_mems_generation); - cs->mems_generation = atomic_read(&cpuset_mems_generation); + cs->mems_generation = cpuset_mems_generation++; fmeter_init(&cs->fmeter); cs->parent = parent; @@ -1861,7 +1916,7 @@ int __init cpuset_init_early(void) struct task_struct *tsk = current; tsk->cpuset = &top_cpuset; - tsk->cpuset->mems_generation = atomic_read(&cpuset_mems_generation); + tsk->cpuset->mems_generation = cpuset_mems_generation++; return 0; } @@ -1880,8 +1935,7 @@ int __init cpuset_init(void) top_cpuset.mems_allowed = NODE_MASK_ALL; fmeter_init(&top_cpuset.fmeter); - atomic_inc(&cpuset_mems_generation); - top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation); + top_cpuset.mems_generation = cpuset_mems_generation++; init_task.cpuset = &top_cpuset; @@ -1972,7 +2026,7 @@ void cpuset_fork(struct task_struct *child) * because tsk is already marked PF_EXITING, so attach_task() won't * mess with it, or task is a failed fork, never visible to attach_task. * - * Hack: + * the_top_cpuset_hack: * * Set the exiting tasks cpuset to the root cpuset (top_cpuset). * @@ -2011,7 +2065,7 @@ void cpuset_exit(struct task_struct *tsk) struct cpuset *cs; cs = tsk->cpuset; - tsk->cpuset = &top_cpuset; /* Hack - see comment above */ + tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */ if (notify_on_release(cs)) { char *pathbuf = NULL; @@ -2151,7 +2205,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ - int allowed = 1; /* is allocation in zone z allowed? */ + int allowed; /* is allocation in zone z allowed? */ if (in_interrupt()) return 1; @@ -2204,6 +2258,44 @@ void cpuset_unlock(void) } /** + * cpuset_mem_spread_node() - On which node to begin search for a page + * + * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for + * tasks in a cpuset with is_spread_page or is_spread_slab set), + * and if the memory allocation used cpuset_mem_spread_node() + * to determine on which node to start looking, as it will for + * certain page cache or slab cache pages such as used for file + * system buffers and inode caches, then instead of starting on the + * local node to look for a free page, rather spread the starting + * node around the tasks mems_allowed nodes. + * + * We don't have to worry about the returned node being offline + * because "it can't happen", and even if it did, it would be ok. + * + * The routines calling guarantee_online_mems() are careful to + * only set nodes in task->mems_allowed that are online. So it + * should not be possible for the following code to return an + * offline node. But if it did, that would be ok, as this routine + * is not returning the node where the allocation must be, only + * the node where the search should start. The zonelist passed to + * __alloc_pages() will include all nodes. If the slab allocator + * is passed an offline node, it will fall back to the local node. + * See kmem_cache_alloc_node(). + */ + +int cpuset_mem_spread_node(void) +{ + int node; + + node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed); + if (node == MAX_NUMNODES) + node = first_node(current->mems_allowed); + current->cpuset_mem_spread_rotor = node; + return node; +} +EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); + +/** * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? * @p: pointer to task_struct of some other task. * @@ -2284,12 +2376,12 @@ void __cpuset_memory_pressure_bump(void) * - No need to task_lock(tsk) on this tsk->cpuset reference, as it * doesn't really matter if tsk->cpuset changes after we read it, * and we take manage_mutex, keeping attach_task() from changing it - * anyway. + * anyway. No need to check that tsk->cpuset != NULL, thanks to + * the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks + * cpuset to top_cpuset. */ - static int proc_cpuset_show(struct seq_file *m, void *v) { - struct cpuset *cs; struct task_struct *tsk; char *buf; int retval = 0; @@ -2300,13 +2392,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v) tsk = m->private; mutex_lock(&manage_mutex); - cs = tsk->cpuset; - if (!cs) { - retval = -EINVAL; - goto out; - } - - retval = cpuset_path(cs, buf, PAGE_SIZE); + retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); if (retval < 0) goto out; seq_puts(m, buf); diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 867d6dbeb57..c01cead2cfd 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -140,6 +140,7 @@ __set_personality(u_long personality) ep = lookup_exec_domain(personality); if (ep == current_thread_info()->exec_domain) { current->personality = personality; + module_put(ep->module); return 0; } diff --git a/kernel/fork.c b/kernel/fork.c index c21bae8c93b..a02063903aa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1021,6 +1021,7 @@ static task_t *copy_process(unsigned long clone_flags, p->mempolicy = NULL; goto bad_fork_cleanup_cpuset; } + mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_DEBUG_MUTEXES diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 49378738ff5..2b33f852be3 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,5 +1,4 @@ -obj-y := handle.o manage.o spurious.o +obj-y := handle.o manage.o spurious.o migration.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o - diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 97d5559997d..6edfcef291e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -204,10 +204,14 @@ int setup_irq(unsigned int irq, struct irqaction * new) p = &desc->action; if ((old = *p) != NULL) { /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } + if (!(old->flags & new->flags & SA_SHIRQ)) + goto mismatch; + +#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ) + /* All handlers must agree on per-cpuness */ + if ((old->flags & IRQ_PER_CPU) != (new->flags & IRQ_PER_CPU)) + goto mismatch; +#endif /* add new interrupt at end of irq queue */ do { @@ -218,7 +222,10 @@ int setup_irq(unsigned int irq, struct irqaction * new) } *p = new; - +#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ) + if (new->flags & SA_PERCPU_IRQ) + desc->status |= IRQ_PER_CPU; +#endif if (!shared) { desc->depth = 0; desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | @@ -236,6 +243,12 @@ int setup_irq(unsigned int irq, struct irqaction * new) register_handler_proc(irq, new); return 0; + +mismatch: + spin_unlock_irqrestore(&desc->lock, flags); + printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); + dump_stack(); + return -EBUSY; } /** diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c new file mode 100644 index 00000000000..52a8655fa08 --- /dev/null +++ b/kernel/irq/migration.c @@ -0,0 +1,65 @@ +#include <linux/irq.h> + +#if defined(CONFIG_GENERIC_PENDING_IRQ) + +void set_pending_irq(unsigned int irq, cpumask_t mask) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + desc->move_irq = 1; + pending_irq_cpumask[irq] = mask; + spin_unlock_irqrestore(&desc->lock, flags); +} + +void move_native_irq(int irq) +{ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + + if (likely(!desc->move_irq)) + return; + + /* + * Paranoia: cpu-local interrupts shouldn't be calling in here anyway. + */ + if (CHECK_IRQ_PER_CPU(desc->status)) { + WARN_ON(1); + return; + } + + desc->move_irq = 0; + + if (likely(cpus_empty(pending_irq_cpumask[irq]))) + return; + + if (!desc->handler->set_affinity) + return; + + assert_spin_locked(&desc->lock); + + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + + /* + * If there was a valid mask to work with, please + * do the disable, re-program, enable sequence. + * This is *not* particularly important for level triggered + * but in a edge trigger case, we might be setting rte + * when an active trigger is comming in. This could + * cause some ioapics to mal-function. + * Being paranoid i guess! + */ + if (unlikely(!cpus_empty(tmp))) { + if (likely(!(desc->status & IRQ_DISABLED))) + desc->handler->disable(irq); + + desc->handler->set_affinity(irq,tmp); + + if (likely(!(desc->status & IRQ_DISABLED))) + desc->handler->enable(irq); + } + cpus_clear(pending_irq_cpumask[irq]); +} + +#endif diff --git a/kernel/itimer.c b/kernel/itimer.c index 379be2f8c84..680e6b70c87 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -143,6 +143,60 @@ int it_real_fn(void *data) return HRTIMER_NORESTART; } +/* + * We do not care about correctness. We just sanitize the values so + * the ktime_t operations which expect normalized values do not + * break. This converts negative values to long timeouts similar to + * the code in kernel versions < 2.6.16 + * + * Print a limited number of warning messages when an invalid timeval + * is detected. + */ +static void fixup_timeval(struct timeval *tv, int interval) +{ + static int warnlimit = 10; + unsigned long tmp; + + if (warnlimit > 0) { + warnlimit--; + printk(KERN_WARNING + "setitimer: %s (pid = %d) provided " + "invalid timeval %s: tv_sec = %ld tv_usec = %ld\n", + current->comm, current->pid, + interval ? "it_interval" : "it_value", + tv->tv_sec, (long) tv->tv_usec); + } + + tmp = tv->tv_usec; + if (tmp >= USEC_PER_SEC) { + tv->tv_usec = tmp % USEC_PER_SEC; + tv->tv_sec += tmp / USEC_PER_SEC; + } + + tmp = tv->tv_sec; + if (tmp > LONG_MAX) + tv->tv_sec = LONG_MAX; +} + +/* + * Returns true if the timeval is in canonical form + */ +#define timeval_valid(t) \ + (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC)) + +/* + * Check for invalid timevals, sanitize them and print a limited + * number of warnings. + */ +static void check_itimerval(struct itimerval *value) { + + if (unlikely(!timeval_valid(&value->it_value))) + fixup_timeval(&value->it_value, 0); + + if (unlikely(!timeval_valid(&value->it_interval))) + fixup_timeval(&value->it_interval, 1); +} + int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) { struct task_struct *tsk = current; @@ -150,6 +204,18 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) ktime_t expires; cputime_t cval, cinterval, nval, ninterval; + /* + * Validate the timevals in value. + * + * Note: Although the spec requires that invalid values shall + * return -EINVAL, we just fixup the value and print a limited + * number of warnings in order not to break users of this + * historical misfeature. + * + * Scheduled for replacement in March 2007 + */ + check_itimerval(value); + switch (which) { case ITIMER_REAL: again: @@ -226,6 +292,43 @@ again: return 0; } +/** + * alarm_setitimer - set alarm in seconds + * + * @seconds: number of seconds until alarm + * 0 disables the alarm + * + * Returns the remaining time in seconds of a pending timer or 0 when + * the timer is not active. + * + * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid + * negative timeval settings which would cause immediate expiry. + */ +unsigned int alarm_setitimer(unsigned int seconds) +{ + struct itimerval it_new, it_old; + +#if BITS_PER_LONG < 64 + if (seconds > INT_MAX) + seconds = INT_MAX; +#endif + it_new.it_value.tv_sec = seconds; + it_new.it_value.tv_usec = 0; + it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + + do_setitimer(ITIMER_REAL, &it_new, &it_old); + + /* + * We can't return 0 if we have an alarm pending ... And we'd + * better return too much than too little anyway + */ + if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) || + it_old.it_value.tv_usec >= 500000) + it_old.it_value.tv_sec++; + + return it_old.it_value.tv_sec; +} + asmlinkage long sys_setitimer(int which, struct itimerval __user *value, struct itimerval __user *ovalue) diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index f2690ed7453..f119e098e67 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -22,7 +22,7 @@ static struct subsys_attribute _name##_attr = __ATTR_RO(_name) static struct subsys_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) -#ifdef CONFIG_HOTPLUG +#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) /* current uevent sequence number */ static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page) { @@ -52,7 +52,7 @@ decl_subsys(kernel, NULL, NULL); EXPORT_SYMBOL_GPL(kernel_subsys); static struct attribute * kernel_attrs[] = { -#ifdef CONFIG_HOTPLUG +#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) &uevent_seqnum_attr.attr, &uevent_helper_attr.attr, #endif diff --git a/kernel/kthread.c b/kernel/kthread.c index 6a5373868a9..c5f3c6613b6 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -115,7 +115,9 @@ static void keventd_create_kthread(void *_create) create->result = ERR_PTR(pid); } else { wait_for_completion(&create->started); + read_lock(&tasklist_lock); create->result = find_task_by_pid(pid); + read_unlock(&tasklist_lock); } complete(&create->done); } diff --git a/kernel/module.c b/kernel/module.c index fb404299082..ddfe45ac2fd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -233,24 +233,6 @@ static unsigned long __find_symbol(const char *name, return 0; } -/* Find a symbol in this elf symbol table */ -static unsigned long find_local_symbol(Elf_Shdr *sechdrs, - unsigned int symindex, - const char *strtab, - const char *name) -{ - unsigned int i; - Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; - - /* Search (defined) internal symbols first. */ - for (i = 1; i < sechdrs[symindex].sh_size/sizeof(*sym); i++) { - if (sym[i].st_shndx != SHN_UNDEF - && strcmp(name, strtab + sym[i].st_name) == 0) - return sym[i].st_value; - } - return 0; -} - /* Search for module by name: must hold module_mutex. */ static struct module *find_module(const char *name) { @@ -785,139 +767,6 @@ static struct module_attribute *modinfo_attrs[] = { NULL, }; -#ifdef CONFIG_OBSOLETE_MODPARM -/* Bounds checking done below */ -static int obsparm_copy_string(const char *val, struct kernel_param *kp) -{ - strcpy(kp->arg, val); - return 0; -} - -static int set_obsolete(const char *val, struct kernel_param *kp) -{ - unsigned int min, max; - unsigned int size, maxsize; - int dummy; - char *endp; - const char *p; - struct obsolete_modparm *obsparm = kp->arg; - - if (!val) { - printk(KERN_ERR "Parameter %s needs an argument\n", kp->name); - return -EINVAL; - } - - /* type is: [min[-max]]{b,h,i,l,s} */ - p = obsparm->type; - min = simple_strtol(p, &endp, 10); - if (endp == obsparm->type) - min = max = 1; - else if (*endp == '-') { - p = endp+1; - max = simple_strtol(p, &endp, 10); - } else - max = min; - switch (*endp) { - case 'b': - return param_array(kp->name, val, min, max, obsparm->addr, - 1, param_set_byte, &dummy); - case 'h': - return param_array(kp->name, val, min, max, obsparm->addr, - sizeof(short), param_set_short, &dummy); - case 'i': - return param_array(kp->name, val, min, max, obsparm->addr, - sizeof(int), param_set_int, &dummy); - case 'l': - return param_array(kp->name, val, min, max, obsparm->addr, - sizeof(long), param_set_long, &dummy); - case 's': - return param_array(kp->name, val, min, max, obsparm->addr, - sizeof(char *), param_set_charp, &dummy); - - case 'c': - /* Undocumented: 1-5c50 means 1-5 strings of up to 49 chars, - and the decl is "char xxx[5][50];" */ - p = endp+1; - maxsize = simple_strtol(p, &endp, 10); - /* We check lengths here (yes, this is a hack). */ - p = val; - while (p[size = strcspn(p, ",")]) { - if (size >= maxsize) - goto oversize; - p += size+1; - } - if (size >= maxsize) - goto oversize; - return param_array(kp->name, val, min, max, obsparm->addr, - maxsize, obsparm_copy_string, &dummy); - } - printk(KERN_ERR "Unknown obsolete parameter type %s\n", obsparm->type); - return -EINVAL; - oversize: - printk(KERN_ERR - "Parameter %s doesn't fit in %u chars.\n", kp->name, maxsize); - return -EINVAL; -} - -static int obsolete_params(const char *name, - char *args, - struct obsolete_modparm obsparm[], - unsigned int num, - Elf_Shdr *sechdrs, - unsigned int symindex, - const char *strtab) -{ - struct kernel_param *kp; - unsigned int i; - int ret; - - kp = kmalloc(sizeof(kp[0]) * num, GFP_KERNEL); - if (!kp) - return -ENOMEM; - - for (i = 0; i < num; i++) { - char sym_name[128 + sizeof(MODULE_SYMBOL_PREFIX)]; - - snprintf(sym_name, sizeof(sym_name), "%s%s", - MODULE_SYMBOL_PREFIX, obsparm[i].name); - - kp[i].name = obsparm[i].name; - kp[i].perm = 000; - kp[i].set = set_obsolete; - kp[i].get = NULL; - obsparm[i].addr - = (void *)find_local_symbol(sechdrs, symindex, strtab, - sym_name); - if (!obsparm[i].addr) { - printk("%s: falsely claims to have parameter %s\n", - name, obsparm[i].name); - ret = -EINVAL; - goto out; - } - kp[i].arg = &obsparm[i]; - } - - ret = parse_args(name, args, kp, num, NULL); - out: - kfree(kp); - return ret; -} -#else -static int obsolete_params(const char *name, - char *args, - struct obsolete_modparm obsparm[], - unsigned int num, - Elf_Shdr *sechdrs, - unsigned int symindex, - const char *strtab) -{ - if (num != 0) - printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", - name); - return 0; -} -#endif /* CONFIG_OBSOLETE_MODPARM */ - static const char vermagic[] = VERMAGIC_STRING; #ifdef CONFIG_MODVERSIONS @@ -1572,7 +1421,6 @@ static struct module *load_module(void __user *umod, exportindex, modindex, obsparmindex, infoindex, gplindex, crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, gplfuturecrcindex; - long arglen; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ @@ -1691,23 +1539,11 @@ static struct module *load_module(void __user *umod, } /* Now copy in args */ - arglen = strlen_user(uargs); - if (!arglen) { - err = -EFAULT; - goto free_hdr; - } - args = kmalloc(arglen, GFP_KERNEL); - if (!args) { - err = -ENOMEM; + args = strndup_user(uargs, ~0UL >> 1); + if (IS_ERR(args)) { + err = PTR_ERR(args); goto free_hdr; } - if (copy_from_user(args, uargs, arglen) != 0) { - err = -EFAULT; - goto free_mod; - } - - /* Userspace could have altered the string after the strlen_user() */ - args[arglen - 1] = '\0'; if (find_module(mod->name)) { err = -EEXIST; @@ -1887,27 +1723,17 @@ static struct module *load_module(void __user *umod, set_fs(old_fs); mod->args = args; - if (obsparmindex) { - err = obsolete_params(mod->name, mod->args, - (struct obsolete_modparm *) - sechdrs[obsparmindex].sh_addr, - sechdrs[obsparmindex].sh_size - / sizeof(struct obsolete_modparm), - sechdrs, symindex, - (char *)sechdrs[strindex].sh_addr); - if (setupindex) - printk(KERN_WARNING "%s: Ignoring new-style " - "parameters in presence of obsolete ones\n", - mod->name); - } else { - /* Size of section 0 is 0, so this works well if no params */ - err = parse_args(mod->name, mod->args, - (struct kernel_param *) - sechdrs[setupindex].sh_addr, - sechdrs[setupindex].sh_size - / sizeof(struct kernel_param), - NULL); - } + if (obsparmindex) + printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", + mod->name); + + /* Size of section 0 is 0, so this works well if no params */ + err = parse_args(mod->name, mod->args, + (struct kernel_param *) + sechdrs[setupindex].sh_addr, + sechdrs[setupindex].sh_size + / sizeof(struct kernel_param), + NULL); if (err < 0) goto arch_cleanup; diff --git a/kernel/params.c b/kernel/params.c index a2915058231..9de637a5c8b 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -265,12 +265,12 @@ int param_get_invbool(char *buffer, struct kernel_param *kp) } /* We cheat here and temporarily mangle the string. */ -int param_array(const char *name, - const char *val, - unsigned int min, unsigned int max, - void *elem, int elemsize, - int (*set)(const char *, struct kernel_param *kp), - int *num) +static int param_array(const char *name, + const char *val, + unsigned int min, unsigned int max, + void *elem, int elemsize, + int (*set)(const char *, struct kernel_param *kp), + int *num) { int ret; struct kernel_param kp; diff --git a/kernel/power/smp.c b/kernel/power/smp.c index 911fc62b822..5957312b2d6 100644 --- a/kernel/power/smp.c +++ b/kernel/power/smp.c @@ -49,9 +49,7 @@ void enable_nonboot_cpus(void) printk("Thawing cpus ...\n"); for_each_cpu_mask(cpu, frozen_cpus) { - error = smp_prepare_cpu(cpu); - if (!error) - error = cpu_up(cpu); + error = cpu_up(cpu); if (!error) { printk("CPU%d is up\n", cpu); continue; diff --git a/kernel/printk.c b/kernel/printk.c index 13ced0f7828..8cc19431e74 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -122,44 +122,6 @@ static char *log_buf = __log_buf; static int log_buf_len = __LOG_BUF_LEN; static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */ -/* - * Setup a list of consoles. Called from init/main.c - */ -static int __init console_setup(char *str) -{ - char name[sizeof(console_cmdline[0].name)]; - char *s, *options; - int idx; - - /* - * Decode str into name, index, options. - */ - if (str[0] >= '0' && str[0] <= '9') { - strcpy(name, "ttyS"); - strncpy(name + 4, str, sizeof(name) - 5); - } else - strncpy(name, str, sizeof(name) - 1); - name[sizeof(name) - 1] = 0; - if ((options = strchr(str, ',')) != NULL) - *(options++) = 0; -#ifdef __sparc__ - if (!strcmp(str, "ttya")) - strcpy(name, "ttyS0"); - if (!strcmp(str, "ttyb")) - strcpy(name, "ttyS1"); -#endif - for (s = name; *s; s++) - if ((*s >= '0' && *s <= '9') || *s == ',') - break; - idx = simple_strtoul(s, NULL, 10); - *s = 0; - - add_preferred_console(name, idx, options); - return 1; -} - -__setup("console=", console_setup); - static int __init log_buf_len_setup(char *str) { unsigned long size = memparse(str, &str); @@ -659,6 +621,44 @@ static void call_console_drivers(unsigned long start, unsigned long end) #endif +/* + * Set up a list of consoles. Called from init/main.c + */ +static int __init console_setup(char *str) +{ + char name[sizeof(console_cmdline[0].name)]; + char *s, *options; + int idx; + + /* + * Decode str into name, index, options. + */ + if (str[0] >= '0' && str[0] <= '9') { + strcpy(name, "ttyS"); + strncpy(name + 4, str, sizeof(name) - 5); + } else { + strncpy(name, str, sizeof(name) - 1); + } + name[sizeof(name) - 1] = 0; + if ((options = strchr(str, ',')) != NULL) + *(options++) = 0; +#ifdef __sparc__ + if (!strcmp(str, "ttya")) + strcpy(name, "ttyS0"); + if (!strcmp(str, "ttyb")) + strcpy(name, "ttyS1"); +#endif + for (s = name; *s; s++) + if ((*s >= '0' && *s <= '9') || *s == ',') + break; + idx = simple_strtoul(s, NULL, 10); + *s = 0; + + add_preferred_console(name, idx, options); + return 1; +} +__setup("console=", console_setup); + /** * add_preferred_console - add a device to the list of preferred consoles. * @name: device name diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 6df1559b1c0..13458bbaa1b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -416,8 +416,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, rdp->curtail = &rdp->curlist; } - local_irq_disable(); if (rdp->nxtlist && !rdp->curlist) { + local_irq_disable(); rdp->curlist = rdp->nxtlist; rdp->curtail = rdp->nxttail; rdp->nxtlist = NULL; @@ -442,9 +442,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, rcu_start_batch(rcp); spin_unlock(&rcp->lock); } - } else { - local_irq_enable(); } + rcu_check_quiescent_state(rcp, rdp); if (rdp->donelist) rcu_do_batch(rdp); diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 7712912dbc8..b4b362b5baf 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -54,15 +54,15 @@ static int verbose; /* Print more debug info. */ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ -MODULE_PARM(nreaders, "i"); +module_param(nreaders, int, 0); MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); -MODULE_PARM(stat_interval, "i"); +module_param(stat_interval, int, 0); MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); -MODULE_PARM(verbose, "i"); +module_param(verbose, bool, 0); MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); -MODULE_PARM(test_no_idle_hz, "i"); +module_param(test_no_idle_hz, bool, 0); MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); -MODULE_PARM(shuffle_interval, "i"); +module_param(shuffle_interval, int, 0); MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); #define TORTURE_FLAG "rcutorture: " #define PRINTK_STRING(s) \ @@ -441,6 +441,16 @@ rcu_torture_shuffle(void *arg) return 0; } +static inline void +rcu_torture_print_module_parms(char *tag) +{ + printk(KERN_ALERT TORTURE_FLAG "--- %s: nreaders=%d " + "stat_interval=%d verbose=%d test_no_idle_hz=%d " + "shuffle_interval = %d\n", + tag, nrealreaders, stat_interval, verbose, test_no_idle_hz, + shuffle_interval); +} + static void rcu_torture_cleanup(void) { @@ -483,9 +493,10 @@ rcu_torture_cleanup(void) rcu_barrier(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ - printk(KERN_ALERT TORTURE_FLAG - "--- End of test: %s\n", - atomic_read(&n_rcu_torture_error) == 0 ? "SUCCESS" : "FAILURE"); + if (atomic_read(&n_rcu_torture_error)) + rcu_torture_print_module_parms("End of test: FAILURE"); + else + rcu_torture_print_module_parms("End of test: SUCCESS"); } static int @@ -501,11 +512,7 @@ rcu_torture_init(void) nrealreaders = nreaders; else nrealreaders = 2 * num_online_cpus(); - printk(KERN_ALERT TORTURE_FLAG "--- Start of test: nreaders=%d " - "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval = %d\n", - nrealreaders, stat_interval, verbose, test_no_idle_hz, - shuffle_interval); + rcu_torture_print_module_parms("Start of test"); fullstop = 0; /* Set up the freelist. */ diff --git a/kernel/softlockup.c b/kernel/softlockup.c index c67189a25d5..d9b3d5847ed 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -1,12 +1,11 @@ /* * Detect Soft Lockups * - * started by Ingo Molnar, (C) 2005, Red Hat + * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc. * * this code detects soft lockups: incidents in where on a CPU * the kernel does not reschedule for 10 seconds or more. */ - #include <linux/mm.h> #include <linux/cpu.h> #include <linux/init.h> @@ -17,13 +16,14 @@ static DEFINE_SPINLOCK(print_lock); -static DEFINE_PER_CPU(unsigned long, timestamp) = 0; -static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0; +static DEFINE_PER_CPU(unsigned long, touch_timestamp); +static DEFINE_PER_CPU(unsigned long, print_timestamp); static DEFINE_PER_CPU(struct task_struct *, watchdog_task); static int did_panic = 0; -static int softlock_panic(struct notifier_block *this, unsigned long event, - void *ptr) + +static int +softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) { did_panic = 1; @@ -36,7 +36,7 @@ static struct notifier_block panic_block = { void touch_softlockup_watchdog(void) { - per_cpu(timestamp, raw_smp_processor_id()) = jiffies; + per_cpu(touch_timestamp, raw_smp_processor_id()) = jiffies; } EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -44,25 +44,35 @@ EXPORT_SYMBOL(touch_softlockup_watchdog); * This callback runs from the timer interrupt, and checks * whether the watchdog thread has hung or not: */ -void softlockup_tick(struct pt_regs *regs) +void softlockup_tick(void) { int this_cpu = smp_processor_id(); - unsigned long timestamp = per_cpu(timestamp, this_cpu); + unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); - if (per_cpu(print_timestamp, this_cpu) == timestamp) + /* prevent double reports: */ + if (per_cpu(print_timestamp, this_cpu) == touch_timestamp || + did_panic || + !per_cpu(watchdog_task, this_cpu)) return; - /* Do not cause a second panic when there already was one */ - if (did_panic) + /* do not print during early bootup: */ + if (unlikely(system_state != SYSTEM_RUNNING)) { + touch_softlockup_watchdog(); return; + } - if (time_after(jiffies, timestamp + 10*HZ)) { - per_cpu(print_timestamp, this_cpu) = timestamp; + /* Wake up the high-prio watchdog task every second: */ + if (time_after(jiffies, touch_timestamp + HZ)) + wake_up_process(per_cpu(watchdog_task, this_cpu)); + + /* Warn about unreasonable 10+ seconds delays: */ + if (time_after(jiffies, touch_timestamp + 10*HZ)) { + per_cpu(print_timestamp, this_cpu) = touch_timestamp; spin_lock(&print_lock); printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", this_cpu); - show_regs(regs); + dump_stack(); spin_unlock(&print_lock); } } @@ -77,18 +87,16 @@ static int watchdog(void * __bind_cpu) sched_setscheduler(current, SCHED_FIFO, ¶m); current->flags |= PF_NOFREEZE; - set_current_state(TASK_INTERRUPTIBLE); - /* - * Run briefly once per second - if this gets delayed for - * more than 10 seconds then the debug-printout triggers - * in softlockup_tick(): + * Run briefly once per second to reset the softlockup timestamp. + * If this gets delayed for more than 10 seconds then the + * debug-printout triggers in softlockup_tick(). */ while (!kthread_should_stop()) { - msleep_interruptible(1000); + set_current_state(TASK_INTERRUPTIBLE); touch_softlockup_watchdog(); + schedule(); } - __set_current_state(TASK_RUNNING); return 0; } @@ -110,11 +118,11 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) printk("watchdog for %i failed\n", hotcpu); return NOTIFY_BAD; } + per_cpu(touch_timestamp, hotcpu) = jiffies; per_cpu(watchdog_task, hotcpu) = p; kthread_bind(p, hotcpu); break; case CPU_ONLINE: - wake_up_process(per_cpu(watchdog_task, hotcpu)); break; #ifdef CONFIG_HOTPLUG_CPU @@ -146,4 +154,3 @@ __init void spawn_softlockup_task(void) notifier_chain_register(&panic_notifier_list, &panic_block); } - diff --git a/kernel/sys.c b/kernel/sys.c index c0fcad9f826..38bc73ede2b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -224,18 +224,6 @@ int unregister_reboot_notifier(struct notifier_block * nb) EXPORT_SYMBOL(unregister_reboot_notifier); -#ifndef CONFIG_SECURITY -int capable(int cap) -{ - if (cap_raised(current->cap_effective, cap)) { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} -EXPORT_SYMBOL(capable); -#endif - static int set_one_prio(struct task_struct *p, int niceval, int error) { int no_nice; @@ -1375,7 +1363,7 @@ static void groups_sort(struct group_info *group_info) /* a simple bsearch */ int groups_search(struct group_info *group_info, gid_t grp) { - int left, right; + unsigned int left, right; if (!group_info) return 0; @@ -1383,7 +1371,7 @@ int groups_search(struct group_info *group_info, gid_t grp) left = 0; right = group_info->ngroups; while (left < right) { - int mid = (left+right)/2; + unsigned int mid = (left+right)/2; int cmp = grp - GROUP_AT(group_info, mid); if (cmp > 0) left = mid + 1; @@ -1433,7 +1421,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) return -EINVAL; /* no need to grab task_lock here; it cannot change */ - get_group_info(current->group_info); i = current->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { @@ -1446,7 +1433,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) } } out: - put_group_info(current->group_info); return i; } @@ -1487,9 +1473,7 @@ int in_group_p(gid_t grp) { int retval = 1; if (grp != current->fsgid) { - get_group_info(current->group_info); retval = groups_search(current->group_info, grp); - put_group_info(current->group_info); } return retval; } @@ -1500,9 +1484,7 @@ int in_egroup_p(gid_t grp) { int retval = 1; if (grp != current->egid) { - get_group_info(current->group_info); retval = groups_search(current->group_info, grp); - put_group_info(current->group_info); } return retval; } @@ -1630,20 +1612,21 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) { struct rlimit new_rlim, *old_rlim; + unsigned long it_prof_secs; int retval; if (resource >= RLIM_NLIMITS) return -EINVAL; - if(copy_from_user(&new_rlim, rlim, sizeof(*rlim))) + if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) return -EFAULT; - if (new_rlim.rlim_cur > new_rlim.rlim_max) - return -EINVAL; + if (new_rlim.rlim_cur > new_rlim.rlim_max) + return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && !capable(CAP_SYS_RESOURCE)) return -EPERM; if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) - return -EPERM; + return -EPERM; retval = security_task_setrlimit(resource, &new_rlim); if (retval) @@ -1653,19 +1636,40 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) *old_rlim = new_rlim; task_unlock(current->group_leader); - if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY && - (cputime_eq(current->signal->it_prof_expires, cputime_zero) || - new_rlim.rlim_cur <= cputime_to_secs( - current->signal->it_prof_expires))) { - cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur); + if (resource != RLIMIT_CPU) + goto out; + + /* + * RLIMIT_CPU handling. Note that the kernel fails to return an error + * code if it rejected the user's attempt to set RLIMIT_CPU. This is a + * very long-standing error, and fixing it now risks breakage of + * applications, so we live with it + */ + if (new_rlim.rlim_cur == RLIM_INFINITY) + goto out; + + it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); + if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { + unsigned long rlim_cur = new_rlim.rlim_cur; + cputime_t cputime; + + if (rlim_cur == 0) { + /* + * The caller is asking for an immediate RLIMIT_CPU + * expiry. But we use the zero value to mean "it was + * never set". So let's cheat and make it one second + * instead + */ + rlim_cur = 1; + } + cputime = secs_to_cputime(rlim_cur); read_lock(&tasklist_lock); spin_lock_irq(¤t->sighand->siglock); - set_process_cpu_timer(current, CPUCLOCK_PROF, - &cputime, NULL); + set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); spin_unlock_irq(¤t->sighand->siglock); read_unlock(&tasklist_lock); } - +out: return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 32b48e8ee36..e82726faeef 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -742,18 +742,18 @@ static ctl_table vm_table[] = { { .ctl_name = VM_DIRTY_WB_CS, .procname = "dirty_writeback_centisecs", - .data = &dirty_writeback_centisecs, - .maxlen = sizeof(dirty_writeback_centisecs), + .data = &dirty_writeback_interval, + .maxlen = sizeof(dirty_writeback_interval), .mode = 0644, .proc_handler = &dirty_writeback_centisecs_handler, }, { .ctl_name = VM_DIRTY_EXPIRE_CS, .procname = "dirty_expire_centisecs", - .data = &dirty_expire_centisecs, - .maxlen = sizeof(dirty_expire_centisecs), + .data = &dirty_expire_interval, + .maxlen = sizeof(dirty_expire_interval), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_userhz_jiffies, }, { .ctl_name = VM_NR_PDFLUSH_THREADS, @@ -848,9 +848,8 @@ static ctl_table vm_table[] = { .data = &laptop_mode, .maxlen = sizeof(laptop_mode), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, - .extra1 = &zero, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, }, { .ctl_name = VM_BLOCK_DUMP, @@ -2054,6 +2053,8 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, int write, void *data) { if (write) { + if (*lvalp > LONG_MAX / HZ) + return 1; *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); } else { int val = *valp; @@ -2075,6 +2076,8 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, int write, void *data) { if (write) { + if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ) + return 1; *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp); } else { int val = *valp; diff --git a/kernel/time.c b/kernel/time.c index 804539165d8..e00a97b7724 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -202,24 +202,6 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv, return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } -long pps_offset; /* pps time offset (us) */ -long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */ - -long pps_freq; /* frequency offset (scaled ppm) */ -long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ - -long pps_valid = PPS_VALID; /* pps signal watchdog counter */ - -int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ - -long pps_jitcnt; /* jitter limit exceeded */ -long pps_calcnt; /* calibration intervals */ -long pps_errcnt; /* calibration errors */ -long pps_stbcnt; /* stability limit exceeded */ - -/* hook for a loadable hardpps kernel module */ -void (*hardpps_ptr)(struct timeval *); - /* we call this to notify the arch when the clock is being * controlled. If no such arch routine, do nothing. */ @@ -279,7 +261,7 @@ int do_adjtimex(struct timex *txc) result = -EINVAL; goto leave; } - time_freq = txc->freq - pps_freq; + time_freq = txc->freq; } if (txc->modes & ADJ_MAXERROR) { @@ -312,10 +294,8 @@ int do_adjtimex(struct timex *txc) if ((time_next_adjust = txc->offset) == 0) time_adjust = 0; } - else if ( time_status & (STA_PLL | STA_PPSTIME) ) { - ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == - (STA_PPSTIME | STA_PPSSIGNAL) ? - pps_offset : txc->offset; + else if (time_status & STA_PLL) { + ltemp = txc->offset; /* * Scale the phase adjustment and @@ -356,23 +336,14 @@ int do_adjtimex(struct timex *txc) } time_freq = min(time_freq, time_tolerance); time_freq = max(time_freq, -time_tolerance); - } /* STA_PLL || STA_PPSTIME */ + } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) { tick_usec = txc->tick; tick_nsec = TICK_USEC_TO_NSEC(tick_usec); } } /* txc->modes */ -leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 - || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0 - && (time_status & STA_PPSSIGNAL) == 0) - /* p. 24, (b) */ - || ((time_status & (STA_PPSTIME|STA_PPSJITTER)) - == (STA_PPSTIME|STA_PPSJITTER)) - /* p. 24, (c) */ - || ((time_status & STA_PPSFREQ) != 0 - && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0)) - /* p. 24, (d) */ +leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) result = TIME_ERROR; if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) @@ -380,7 +351,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 else { txc->offset = shift_right(time_offset, SHIFT_UPDATE); } - txc->freq = time_freq + pps_freq; + txc->freq = time_freq; txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; @@ -388,14 +359,16 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 txc->precision = time_precision; txc->tolerance = time_tolerance; txc->tick = tick_usec; - txc->ppsfreq = pps_freq; - txc->jitter = pps_jitter >> PPS_AVG; - txc->shift = pps_shift; - txc->stabil = pps_stabil; - txc->jitcnt = pps_jitcnt; - txc->calcnt = pps_calcnt; - txc->errcnt = pps_errcnt; - txc->stbcnt = pps_stbcnt; + + /* PPS is not implemented, so these are zero */ + txc->ppsfreq = 0; + txc->jitter = 0; + txc->shift = 0; + txc->stabil = 0; + txc->jitcnt = 0; + txc->calcnt = 0; + txc->errcnt = 0; + txc->stbcnt = 0; write_sequnlock_irq(&xtime_lock); do_gettimeofday(&txc->time); notify_arch_cmos_timer(); diff --git a/kernel/timer.c b/kernel/timer.c index 2410c18dbeb..ab189dd187c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -86,7 +86,8 @@ struct tvec_t_base_s { } ____cacheline_aligned_in_smp; typedef struct tvec_t_base_s tvec_base_t; -static DEFINE_PER_CPU(tvec_base_t, tvec_bases); +static DEFINE_PER_CPU(tvec_base_t *, tvec_bases); +static tvec_base_t boot_tvec_bases; static inline void set_running_timer(tvec_base_t *base, struct timer_list *timer) @@ -157,7 +158,7 @@ EXPORT_SYMBOL(__init_timer_base); void fastcall init_timer(struct timer_list *timer) { timer->entry.next = NULL; - timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base; + timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base; } EXPORT_SYMBOL(init_timer); @@ -218,7 +219,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) ret = 1; } - new_base = &__get_cpu_var(tvec_bases); + new_base = __get_cpu_var(tvec_bases); if (base != &new_base->t_base) { /* @@ -258,7 +259,7 @@ EXPORT_SYMBOL(__mod_timer); */ void add_timer_on(struct timer_list *timer, int cpu) { - tvec_base_t *base = &per_cpu(tvec_bases, cpu); + tvec_base_t *base = per_cpu(tvec_bases, cpu); unsigned long flags; BUG_ON(timer_pending(timer) || !timer->function); @@ -504,7 +505,7 @@ unsigned long next_timer_interrupt(void) } hr_expires += jiffies; - base = &__get_cpu_var(tvec_bases); + base = __get_cpu_var(tvec_bases); spin_lock(&base->t_base.lock); expires = base->timer_jiffies + (LONG_MAX >> 1); list = NULL; @@ -696,18 +697,9 @@ static void second_overflow(void) /* * Compute the frequency estimate and additional phase adjustment due - * to frequency error for the next second. When the PPS signal is - * engaged, gnaw on the watchdog counter and update the frequency - * computed by the pll and the PPS signal. + * to frequency error for the next second. */ - pps_valid++; - if (pps_valid == PPS_VALID) { /* PPS signal lost */ - pps_jitter = MAXTIME; - pps_stabil = MAXFREQ; - time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | - STA_PPSWANDER | STA_PPSERROR); - } - ltemp = time_freq + pps_freq; + ltemp = time_freq; time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE)); #if HZ == 100 @@ -901,7 +893,7 @@ EXPORT_SYMBOL(xtime_lock); */ static void run_timer_softirq(struct softirq_action *h) { - tvec_base_t *base = &__get_cpu_var(tvec_bases); + tvec_base_t *base = __get_cpu_var(tvec_bases); hrtimer_run_queues(); if (time_after_eq(jiffies, base->timer_jiffies)) @@ -914,6 +906,7 @@ static void run_timer_softirq(struct softirq_action *h) void run_local_timers(void) { raise_softirq(TIMER_SOFTIRQ); + softlockup_tick(); } /* @@ -944,7 +937,6 @@ void do_timer(struct pt_regs *regs) /* prevent loading jiffies before storing new jiffies_64 value. */ barrier(); update_times(); - softlockup_tick(regs); } #ifdef __ARCH_WANT_SYS_ALARM @@ -955,19 +947,7 @@ void do_timer(struct pt_regs *regs) */ asmlinkage unsigned long sys_alarm(unsigned int seconds) { - struct itimerval it_new, it_old; - unsigned int oldalarm; - - it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; - it_new.it_value.tv_sec = seconds; - it_new.it_value.tv_usec = 0; - do_setitimer(ITIMER_REAL, &it_new, &it_old); - oldalarm = it_old.it_value.tv_sec; - /* ehhh.. We can't return 0 if we have an alarm pending.. */ - /* And we'd better return too much than too little anyway */ - if ((!oldalarm && it_old.it_value.tv_usec) || it_old.it_value.tv_usec >= 500000) - oldalarm++; - return oldalarm; + return alarm_setitimer(seconds); } #endif @@ -1256,12 +1236,32 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) return 0; } -static void __devinit init_timers_cpu(int cpu) +static int __devinit init_timers_cpu(int cpu) { int j; tvec_base_t *base; - base = &per_cpu(tvec_bases, cpu); + base = per_cpu(tvec_bases, cpu); + if (!base) { + static char boot_done; + + /* + * Cannot do allocation in init_timers as that runs before the + * allocator initializes (and would waste memory if there are + * more possible CPUs than will ever be installed/brought up). + */ + if (boot_done) { + base = kmalloc_node(sizeof(*base), GFP_KERNEL, + cpu_to_node(cpu)); + if (!base) + return -ENOMEM; + memset(base, 0, sizeof(*base)); + } else { + base = &boot_tvec_bases; + boot_done = 1; + } + per_cpu(tvec_bases, cpu) = base; + } spin_lock_init(&base->t_base.lock); for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); @@ -1273,6 +1273,7 @@ static void __devinit init_timers_cpu(int cpu) INIT_LIST_HEAD(base->tv1.vec + j); base->timer_jiffies = jiffies; + return 0; } #ifdef CONFIG_HOTPLUG_CPU @@ -1295,8 +1296,8 @@ static void __devinit migrate_timers(int cpu) int i; BUG_ON(cpu_online(cpu)); - old_base = &per_cpu(tvec_bases, cpu); - new_base = &get_cpu_var(tvec_bases); + old_base = per_cpu(tvec_bases, cpu); + new_base = get_cpu_var(tvec_bases); local_irq_disable(); spin_lock(&new_base->t_base.lock); @@ -1326,7 +1327,8 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; switch(action) { case CPU_UP_PREPARE: - init_timers_cpu(cpu); + if (init_timers_cpu(cpu) < 0) + return NOTIFY_BAD; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: diff --git a/kernel/user.c b/kernel/user.c index d9deae43a9a..2116642f42c 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -105,15 +105,19 @@ void free_uid(struct user_struct *up) { unsigned long flags; + if (!up) + return; + local_irq_save(flags); - if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { + if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { uid_hash_remove(up); + spin_unlock_irqrestore(&uidhash_lock, flags); key_put(up->uid_keyring); key_put(up->session_keyring); kmem_cache_free(uid_cachep, up); - spin_unlock(&uidhash_lock); + } else { + local_irq_restore(flags); } - local_irq_restore(flags); } struct user_struct * alloc_uid(uid_t uid) |