summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAnton Altaparmakov <aia21@cantab.net>2005-06-25 14:27:27 +0100
committerAnton Altaparmakov <aia21@cantab.net>2005-06-25 14:27:27 +0100
commit38b22b6e9f46ab8f73ef5734f0e0a000766a9258 (patch)
tree2ccc41ef55918d3af43e444bde7648562a031559 /kernel
parent3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4 (diff)
parentb3e112bcc19abd8e9657dca34a87316786e096f3 (diff)
Automerge with /usr/src/ntfs-2.6.git.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz46
-rw-r--r--kernel/cpuset.c8
-rw-r--r--kernel/exit.c16
-rw-r--r--kernel/irq/spurious.c2
-rw-r--r--kernel/kmod.c17
-rw-r--r--kernel/kprobes.c288
-rw-r--r--kernel/module.c97
-rw-r--r--kernel/posix-timers.c34
-rw-r--r--kernel/power/swsusp.c2
-rw-r--r--kernel/printk.c12
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/signal.c1
-rw-r--r--kernel/sys.c110
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/timer.c351
15 files changed, 718 insertions, 279 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
new file mode 100644
index 00000000000..248e1c396f8
--- /dev/null
+++ b/kernel/Kconfig.hz
@@ -0,0 +1,46 @@
+#
+# Timer Interrupt Frequency Configuration
+#
+
+choice
+ prompt "Timer frequency"
+ default HZ_250
+ help
+ Allows the configuration of the timer frequency. It is customary
+ to have the timer interrupt run at 1000 HZ but 100 HZ may be more
+ beneficial for servers and NUMA systems that do not need to have
+ a fast response for user interaction and that may experience bus
+ contention and cacheline bounces as a result of timer interrupts.
+ Note that the timer interrupt occurs on each processor in an SMP
+ environment leading to NR_CPUS * HZ number of timer interrupts
+ per second.
+
+
+ config HZ_100
+ bool "100 HZ"
+ help
+ 100 HZ is a typical choice for servers, SMP and NUMA systems
+ with lots of processors that may show reduced performance if
+ too many timer interrupts are occurring.
+
+ config HZ_250
+ bool "250 HZ"
+ help
+ 250 HZ is a good compromise choice allowing server performance
+ while also showing good interactive responsiveness even
+ on SMP and NUMA systems.
+
+ config HZ_1000
+ bool "1000 HZ"
+ help
+ 1000 HZ is the preferred choice for desktop systems and other
+ systems requiring fast interactive responses to events.
+
+endchoice
+
+config HZ
+ int
+ default 100 if HZ_100
+ default 250 if HZ_250
+ default 1000 if HZ_1000
+
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 00e8f257551..79dd929f408 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -228,13 +228,7 @@ static struct dentry_operations cpuset_dops = {
static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
{
- struct qstr qstr;
- struct dentry *d;
-
- qstr.name = name;
- qstr.len = strlen(name);
- qstr.hash = full_name_hash(name, qstr.len);
- d = lookup_hash(&qstr, parent);
+ struct dentry *d = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(d))
d->d_op = &cpuset_dops;
return d;
diff --git a/kernel/exit.c b/kernel/exit.c
index 2ef2ad54020..3ebcd60a19c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,6 +72,11 @@ repeat:
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p);
__exit_sighand(p);
+ /*
+ * Note that the fastpath in sys_times depends on __exit_signal having
+ * updated the counters before a task is removed from the tasklist of
+ * the process by __unhash_process.
+ */
__unhash_process(p);
/*
@@ -793,6 +798,17 @@ fastcall NORET_TYPE void do_exit(long code)
ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
}
+ /*
+ * We're taking recursive faults here in do_exit. Safest is to just
+ * leave this task alone and wait for reboot.
+ */
+ if (unlikely(tsk->flags & PF_EXITING)) {
+ printk(KERN_ALERT
+ "Fixing recursive fault but reboot is needed!\n");
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule();
+ }
+
tsk->flags |= PF_EXITING;
/*
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index f6297c30690..ba039e827d5 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -45,7 +45,7 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
}
}
-void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
{
static int count = 100;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index eed53d4f523..44166e3bb8a 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -120,6 +120,7 @@ struct subprocess_info {
char *path;
char **argv;
char **envp;
+ struct key *ring;
int wait;
int retval;
};
@@ -130,16 +131,21 @@ struct subprocess_info {
static int ____call_usermodehelper(void *data)
{
struct subprocess_info *sub_info = data;
+ struct key *old_session;
int retval;
- /* Unblock all signals. */
+ /* Unblock all signals and set the session keyring. */
+ key_get(sub_info->ring);
flush_signals(current);
spin_lock_irq(&current->sighand->siglock);
+ old_session = __install_session_keyring(current, sub_info->ring);
flush_signal_handlers(current, 1);
sigemptyset(&current->blocked);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
+ key_put(old_session);
+
/* We can run anywhere, unlike our parent keventd(). */
set_cpus_allowed(current, CPU_MASK_ALL);
@@ -211,10 +217,11 @@ static void __call_usermodehelper(void *data)
}
/**
- * call_usermodehelper - start a usermode application
+ * call_usermodehelper_keys - start a usermode application
* @path: pathname for the application
* @argv: null-terminated argument list
* @envp: null-terminated environment list
+ * @session_keyring: session keyring for process (NULL for an empty keyring)
* @wait: wait for the application to finish and return status.
*
* Runs a user-space application. The application is started
@@ -224,7 +231,8 @@ static void __call_usermodehelper(void *data)
* Must be called from process context. Returns a negative error code
* if program was not execed successfully, or 0.
*/
-int call_usermodehelper(char *path, char **argv, char **envp, int wait)
+int call_usermodehelper_keys(char *path, char **argv, char **envp,
+ struct key *session_keyring, int wait)
{
DECLARE_COMPLETION(done);
struct subprocess_info sub_info = {
@@ -232,6 +240,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait)
.path = path,
.argv = argv,
.envp = envp,
+ .ring = session_keyring,
.wait = wait,
.retval = 0,
};
@@ -247,7 +256,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait)
wait_for_completion(&done);
return sub_info.retval;
}
-EXPORT_SYMBOL(call_usermodehelper);
+EXPORT_SYMBOL(call_usermodehelper_keys);
void __init usermodehelper_init(void)
{
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 037142b72a4..334f37472c5 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -27,6 +27,9 @@
* interface to access function arguments.
* 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
* exceptions notifier to be first on the priority list.
+ * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
+ * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
+ * <prasanna@in.ibm.com> added function-return probes.
*/
#include <linux/kprobes.h>
#include <linux/spinlock.h>
@@ -41,6 +44,7 @@
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
+static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
unsigned int kprobe_cpu = NR_CPUS;
static DEFINE_SPINLOCK(kprobe_lock);
@@ -78,22 +82,23 @@ struct kprobe *get_kprobe(void *addr)
* Aggregate handlers for multiple kprobes support - these handlers
* take care of invoking the individual kprobe handlers on p->list
*/
-int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe *kp;
list_for_each_entry(kp, &p->list, list) {
if (kp->pre_handler) {
curr_kprobe = kp;
- kp->pre_handler(kp, regs);
- curr_kprobe = NULL;
+ if (kp->pre_handler(kp, regs))
+ return 1;
}
+ curr_kprobe = NULL;
}
return 0;
}
-void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
- unsigned long flags)
+static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
+ unsigned long flags)
{
struct kprobe *kp;
@@ -107,7 +112,8 @@ void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
return;
}
-int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
+static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
+ int trapnr)
{
/*
* if we faulted "during" the execution of a user specified
@@ -120,19 +126,191 @@ int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
return 0;
}
+static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe *kp = curr_kprobe;
+ if (curr_kprobe && kp->break_handler) {
+ if (kp->break_handler(kp, regs)) {
+ curr_kprobe = NULL;
+ return 1;
+ }
+ }
+ curr_kprobe = NULL;
+ return 0;
+}
+
+struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler,
+ .post_handler = trampoline_post_handler
+};
+
+struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
+{
+ struct hlist_node *node;
+ struct kretprobe_instance *ri;
+ hlist_for_each_entry(ri, node, &rp->free_instances, uflist)
+ return ri;
+ return NULL;
+}
+
+static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
+{
+ struct hlist_node *node;
+ struct kretprobe_instance *ri;
+ hlist_for_each_entry(ri, node, &rp->used_instances, uflist)
+ return ri;
+ return NULL;
+}
+
+struct kretprobe_instance *get_rp_inst(void *sara)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct task_struct *tsk;
+ struct kretprobe_instance *ri;
+
+ tsk = arch_get_kprobe_task(sara);
+ head = &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+ hlist_for_each_entry(ri, node, head, hlist) {
+ if (ri->stack_addr == sara)
+ return ri;
+ }
+ return NULL;
+}
+
+void add_rp_inst(struct kretprobe_instance *ri)
+{
+ struct task_struct *tsk;
+ /*
+ * Remove rp inst off the free list -
+ * Add it back when probed function returns
+ */
+ hlist_del(&ri->uflist);
+ tsk = arch_get_kprobe_task(ri->stack_addr);
+ /* Add rp inst onto table */
+ INIT_HLIST_NODE(&ri->hlist);
+ hlist_add_head(&ri->hlist,
+ &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]);
+
+ /* Also add this rp inst to the used list. */
+ INIT_HLIST_NODE(&ri->uflist);
+ hlist_add_head(&ri->uflist, &ri->rp->used_instances);
+}
+
+void recycle_rp_inst(struct kretprobe_instance *ri)
+{
+ /* remove rp inst off the rprobe_inst_table */
+ hlist_del(&ri->hlist);
+ if (ri->rp) {
+ /* remove rp inst off the used list */
+ hlist_del(&ri->uflist);
+ /* put rp inst back onto the free list */
+ INIT_HLIST_NODE(&ri->uflist);
+ hlist_add_head(&ri->uflist, &ri->rp->free_instances);
+ } else
+ /* Unregistering */
+ kfree(ri);
+}
+
+struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
+{
+ return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+}
+
+struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk)
+{
+ struct task_struct *tsk;
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct kretprobe_instance *ri;
+
+ head = &kretprobe_inst_table[hash_ptr(tk, KPROBE_HASH_BITS)];
+
+ hlist_for_each_entry(ri, node, head, hlist) {
+ tsk = arch_get_kprobe_task(ri->stack_addr);
+ if (tsk == tk)
+ return ri;
+ }
+ return NULL;
+}
+
+/*
+ * This function is called from do_exit or do_execv when task tk's stack is
+ * about to be recycled. Recycle any function-return probe instances
+ * associated with this task. These represent probed functions that have
+ * been called but may never return.
+ */
+void kprobe_flush_task(struct task_struct *tk)
+{
+ unsigned long flags = 0;
+ spin_lock_irqsave(&kprobe_lock, flags);
+ arch_kprobe_flush_task(tk);
+ spin_unlock_irqrestore(&kprobe_lock, flags);
+}
+
+/*
+ * This kprobe pre_handler is registered with every kretprobe. When probe
+ * hits it will set up the return probe.
+ */
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe *rp = container_of(p, struct kretprobe, kp);
+
+ /*TODO: consider to only swap the RA after the last pre_handler fired */
+ arch_prepare_kretprobe(rp, regs);
+ return 0;
+}
+
+static inline void free_rp_inst(struct kretprobe *rp)
+{
+ struct kretprobe_instance *ri;
+ while ((ri = get_free_rp_inst(rp)) != NULL) {
+ hlist_del(&ri->uflist);
+ kfree(ri);
+ }
+}
+
+/*
+ * Keep all fields in the kprobe consistent
+ */
+static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+{
+ memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
+ memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+}
+
+/*
+* Add the new probe to old_p->list. Fail if this is the
+* second jprobe at the address - two jprobes can't coexist
+*/
+static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
+{
+ struct kprobe *kp;
+
+ if (p->break_handler) {
+ list_for_each_entry(kp, &old_p->list, list) {
+ if (kp->break_handler)
+ return -EEXIST;
+ }
+ list_add_tail(&p->list, &old_p->list);
+ } else
+ list_add(&p->list, &old_p->list);
+ return 0;
+}
+
/*
* Fill in the required fields of the "manager kprobe". Replace the
* earlier kprobe in the hlist with the manager kprobe
*/
static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
{
+ copy_kprobe(p, ap);
ap->addr = p->addr;
- ap->opcode = p->opcode;
- memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn));
-
ap->pre_handler = aggr_pre_handler;
ap->post_handler = aggr_post_handler;
ap->fault_handler = aggr_fault_handler;
+ ap->break_handler = aggr_break_handler;
INIT_LIST_HEAD(&ap->list);
list_add(&p->list, &ap->list);
@@ -153,16 +331,16 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
int ret = 0;
struct kprobe *ap;
- if (old_p->break_handler || p->break_handler) {
- ret = -EEXIST; /* kprobe and jprobe can't (yet) coexist */
- } else if (old_p->pre_handler == aggr_pre_handler) {
- list_add(&p->list, &old_p->list);
+ if (old_p->pre_handler == aggr_pre_handler) {
+ copy_kprobe(old_p, p);
+ ret = add_new_kprobe(old_p, p);
} else {
ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC);
if (!ap)
return -ENOMEM;
add_aggr_kprobe(ap, old_p);
- list_add(&p->list, &ap->list);
+ copy_kprobe(ap, p);
+ ret = add_new_kprobe(ap, p);
}
return ret;
}
@@ -170,10 +348,8 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
/* kprobe removal house-keeping routines */
static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
{
- *p->addr = p->opcode;
+ arch_disarm_kprobe(p);
hlist_del(&p->hlist);
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
spin_unlock_irqrestore(&kprobe_lock, flags);
arch_remove_kprobe(p);
}
@@ -200,6 +376,7 @@ int register_kprobe(struct kprobe *p)
}
spin_lock_irqsave(&kprobe_lock, flags);
old_p = get_kprobe(p->addr);
+ p->nmissed = 0;
if (old_p) {
ret = register_aggr_kprobe(old_p, p);
goto out;
@@ -210,10 +387,8 @@ int register_kprobe(struct kprobe *p)
hlist_add_head(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
- p->opcode = *p->addr;
- *p->addr = BREAKPOINT_INSTRUCTION;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ arch_arm_kprobe(p);
+
out:
spin_unlock_irqrestore(&kprobe_lock, flags);
rm_kprobe:
@@ -257,16 +432,82 @@ void unregister_jprobe(struct jprobe *jp)
unregister_kprobe(&jp->kp);
}
+#ifdef ARCH_SUPPORTS_KRETPROBES
+
+int register_kretprobe(struct kretprobe *rp)
+{
+ int ret = 0;
+ struct kretprobe_instance *inst;
+ int i;
+
+ rp->kp.pre_handler = pre_handler_kretprobe;
+
+ /* Pre-allocate memory for max kretprobe instances */
+ if (rp->maxactive <= 0) {
+#ifdef CONFIG_PREEMPT
+ rp->maxactive = max(10, 2 * NR_CPUS);
+#else
+ rp->maxactive = NR_CPUS;
+#endif
+ }
+ INIT_HLIST_HEAD(&rp->used_instances);
+ INIT_HLIST_HEAD(&rp->free_instances);
+ for (i = 0; i < rp->maxactive; i++) {
+ inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL);
+ if (inst == NULL) {
+ free_rp_inst(rp);
+ return -ENOMEM;
+ }
+ INIT_HLIST_NODE(&inst->uflist);
+ hlist_add_head(&inst->uflist, &rp->free_instances);
+ }
+
+ rp->nmissed = 0;
+ /* Establish function entry probe point */
+ if ((ret = register_kprobe(&rp->kp)) != 0)
+ free_rp_inst(rp);
+ return ret;
+}
+
+#else /* ARCH_SUPPORTS_KRETPROBES */
+
+int register_kretprobe(struct kretprobe *rp)
+{
+ return -ENOSYS;
+}
+
+#endif /* ARCH_SUPPORTS_KRETPROBES */
+
+void unregister_kretprobe(struct kretprobe *rp)
+{
+ unsigned long flags;
+ struct kretprobe_instance *ri;
+
+ unregister_kprobe(&rp->kp);
+ /* No race here */
+ spin_lock_irqsave(&kprobe_lock, flags);
+ free_rp_inst(rp);
+ while ((ri = get_used_rp_inst(rp)) != NULL) {
+ ri->rp = NULL;
+ hlist_del(&ri->uflist);
+ }
+ spin_unlock_irqrestore(&kprobe_lock, flags);
+}
+
static int __init init_kprobes(void)
{
int i, err = 0;
/* FIXME allocate the probe table, currently defined statically */
/* initialize all list heads */
- for (i = 0; i < KPROBE_TABLE_SIZE; i++)
+ for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
INIT_HLIST_HEAD(&kprobe_table[i]);
+ INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
+ }
err = register_die_notifier(&kprobe_exceptions_nb);
+ /* Register the trampoline probe for return probe */
+ register_kprobe(&trampoline_p);
return err;
}
@@ -277,3 +518,6 @@ EXPORT_SYMBOL_GPL(unregister_kprobe);
EXPORT_SYMBOL_GPL(register_jprobe);
EXPORT_SYMBOL_GPL(unregister_jprobe);
EXPORT_SYMBOL_GPL(jprobe_return);
+EXPORT_SYMBOL_GPL(register_kretprobe);
+EXPORT_SYMBOL_GPL(unregister_kretprobe);
+
diff --git a/kernel/module.c b/kernel/module.c
index a566745dde6..068e271ab3a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -35,6 +35,7 @@
#include <linux/notifier.h>
#include <linux/stop_machine.h>
#include <linux/device.h>
+#include <linux/string.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
#include <asm/cacheflush.h>
@@ -370,6 +371,43 @@ static inline void percpu_modcopy(void *pcpudst, const void *src,
#endif /* CONFIG_SMP */
#ifdef CONFIG_MODULE_UNLOAD
+#define MODINFO_ATTR(field) \
+static void setup_modinfo_##field(struct module *mod, const char *s) \
+{ \
+ mod->field = kstrdup(s, GFP_KERNEL); \
+} \
+static ssize_t show_modinfo_##field(struct module_attribute *mattr, \
+ struct module *mod, char *buffer) \
+{ \
+ return sprintf(buffer, "%s\n", mod->field); \
+} \
+static int modinfo_##field##_exists(struct module *mod) \
+{ \
+ return mod->field != NULL; \
+} \
+static void free_modinfo_##field(struct module *mod) \
+{ \
+ kfree(mod->field); \
+ mod->field = NULL; \
+} \
+static struct module_attribute modinfo_##field = { \
+ .attr = { .name = __stringify(field), .mode = 0444, \
+ .owner = THIS_MODULE }, \
+ .show = show_modinfo_##field, \
+ .setup = setup_modinfo_##field, \
+ .test = modinfo_##field##_exists, \
+ .free = free_modinfo_##field, \
+};
+
+MODINFO_ATTR(version);
+MODINFO_ATTR(srcversion);
+
+static struct module_attribute *modinfo_attrs[] = {
+ &modinfo_version,
+ &modinfo_srcversion,
+ NULL,
+};
+
/* Init the unload section of the module. */
static void module_unload_init(struct module *mod)
{
@@ -692,7 +730,7 @@ static int obsparm_copy_string(const char *val, struct kernel_param *kp)
return 0;
}
-int set_obsolete(const char *val, struct kernel_param *kp)
+static int set_obsolete(const char *val, struct kernel_param *kp)
{
unsigned int min, max;
unsigned int size, maxsize;
@@ -1031,6 +1069,32 @@ static void module_remove_refcnt_attr(struct module *mod)
}
#endif
+#ifdef CONFIG_MODULE_UNLOAD
+static int module_add_modinfo_attrs(struct module *mod)
+{
+ struct module_attribute *attr;
+ int error = 0;
+ int i;
+
+ for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) {
+ if (!attr->test ||
+ (attr->test && attr->test(mod)))
+ error = sysfs_create_file(&mod->mkobj.kobj,&attr->attr);
+ }
+ return error;
+}
+
+static void module_remove_modinfo_attrs(struct module *mod)
+{
+ struct module_attribute *attr;
+ int i;
+
+ for (i = 0; (attr = modinfo_attrs[i]); i++) {
+ sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
+ attr->free(mod);
+ }
+}
+#endif
static int mod_sysfs_setup(struct module *mod,
struct kernel_param *kparam,
@@ -1056,6 +1120,12 @@ static int mod_sysfs_setup(struct module *mod,
if (err)
goto out_unreg;
+#ifdef CONFIG_MODULE_UNLOAD
+ err = module_add_modinfo_attrs(mod);
+ if (err)
+ goto out_unreg;
+#endif
+
return 0;
out_unreg:
@@ -1066,6 +1136,9 @@ out:
static void mod_kobject_remove(struct module *mod)
{
+#ifdef CONFIG_MODULE_UNLOAD
+ module_remove_modinfo_attrs(mod);
+#endif
module_remove_refcnt_attr(mod);
module_param_sysfs_remove(mod);
@@ -1311,6 +1384,23 @@ static char *get_modinfo(Elf_Shdr *sechdrs,
return NULL;
}
+#ifdef CONFIG_MODULE_UNLOAD
+static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
+ unsigned int infoindex)
+{
+ struct module_attribute *attr;
+ int i;
+
+ for (i = 0; (attr = modinfo_attrs[i]); i++) {
+ if (attr->setup)
+ attr->setup(mod,
+ get_modinfo(sechdrs,
+ infoindex,
+ attr->attr.name));
+ }
+}
+#endif
+
#ifdef CONFIG_KALLSYMS
int is_exported(const char *name, const struct module *mod)
{
@@ -1615,6 +1705,11 @@ static struct module *load_module(void __user *umod,
/* Set up license info based on the info section */
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
+#ifdef CONFIG_MODULE_UNLOAD
+ /* Set up MODINFO_ATTR fields */
+ setup_modinfo(mod, sechdrs, infoindex);
+#endif
+
/* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
mod);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index cabb63fc9e1..5b7b4736d82 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -89,23 +89,6 @@ static struct idr posix_timers_id;
static DEFINE_SPINLOCK(idr_lock);
/*
- * Just because the timer is not in the timer list does NOT mean it is
- * inactive. It could be in the "fire" routine getting a new expire time.
- */
-#define TIMER_INACTIVE 1
-
-#ifdef CONFIG_SMP
-# define timer_active(tmr) \
- ((tmr)->it.real.timer.entry.prev != (void *)TIMER_INACTIVE)
-# define set_timer_inactive(tmr) \
- do { \
- (tmr)->it.real.timer.entry.prev = (void *)TIMER_INACTIVE; \
- } while (0)
-#else
-# define timer_active(tmr) BARFY // error to use outside of SMP
-# define set_timer_inactive(tmr) do { } while (0)
-#endif
-/*
* we assume that the new SIGEV_THREAD_ID shares no bits with the other
* SIGEV values. Here we put out an error if this assumption fails.
*/
@@ -226,7 +209,6 @@ static inline int common_timer_create(struct k_itimer *new_timer)
init_timer(&new_timer->it.real.timer);
new_timer->it.real.timer.data = (unsigned long) new_timer;
new_timer->it.real.timer.function = posix_timer_fn;
- set_timer_inactive(new_timer);
return 0;
}
@@ -480,7 +462,6 @@ static void posix_timer_fn(unsigned long __data)
int do_notify = 1;
spin_lock_irqsave(&timr->it_lock, flags);
- set_timer_inactive(timr);
if (!list_empty(&timr->it.real.abs_timer_entry)) {
spin_lock(&abs_list.lock);
do {
@@ -983,8 +964,8 @@ common_timer_set(struct k_itimer *timr, int flags,
* careful here. If smp we could be in the "fire" routine which will
* be spinning as we hold the lock. But this is ONLY an SMP issue.
*/
+ if (try_to_del_timer_sync(&timr->it.real.timer) < 0) {
#ifdef CONFIG_SMP
- if (timer_active(timr) && !del_timer(&timr->it.real.timer))
/*
* It can only be active if on an other cpu. Since
* we have cleared the interval stuff above, it should
@@ -994,11 +975,9 @@ common_timer_set(struct k_itimer *timr, int flags,
* a "retry" exit status.
*/
return TIMER_RETRY;
-
- set_timer_inactive(timr);
-#else
- del_timer(&timr->it.real.timer);
#endif
+ }
+
remove_from_abslist(timr);
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
@@ -1083,8 +1062,9 @@ retry:
static inline int common_timer_del(struct k_itimer *timer)
{
timer->it.real.incr = 0;
+
+ if (try_to_del_timer_sync(&timer->it.real.timer) < 0) {
#ifdef CONFIG_SMP
- if (timer_active(timer) && !del_timer(&timer->it.real.timer))
/*
* It can only be active if on an other cpu. Since
* we have cleared the interval stuff above, it should
@@ -1094,9 +1074,9 @@ static inline int common_timer_del(struct k_itimer *timer)
* a "retry" exit status.
*/
return TIMER_RETRY;
-#else
- del_timer(&timer->it.real.timer);
#endif
+ }
+
remove_from_abslist(timer);
return 0;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 90b3b68dee3..53f9f8720ee 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -81,7 +81,7 @@ static int nr_copy_pages_check;
extern char resume_file[];
/* Local variables that should not be affected by save */
-unsigned int nr_copy_pages __nosavedata = 0;
+static unsigned int nr_copy_pages __nosavedata = 0;
/* Suspend pagedir is allocated before final copy, therefore it
must be freed after resume
diff --git a/kernel/printk.c b/kernel/printk.c
index 01b58d7d17f..3a442bfb8be 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -876,8 +876,10 @@ void register_console(struct console * console)
break;
console->flags |= CON_ENABLED;
console->index = console_cmdline[i].index;
- if (i == preferred_console)
+ if (i == selected_console) {
console->flags |= CON_CONSDEV;
+ preferred_console = selected_console;
+ }
break;
}
@@ -897,6 +899,8 @@ void register_console(struct console * console)
if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
console->next = console_drivers;
console_drivers = console;
+ if (console->next)
+ console->next->flags &= ~CON_CONSDEV;
} else {
console->next = console_drivers->next;
console_drivers->next = console;
@@ -937,10 +941,14 @@ int unregister_console(struct console * console)
/* If last console is removed, we re-enable picking the first
* one that gets registered. Without that, pmac early boot console
* would prevent fbcon from taking over.
+ *
+ * If this isn't the last console and it has CON_CONSDEV set, we
+ * need to set it on the next preferred console.
*/
if (console_drivers == NULL)
preferred_console = selected_console;
-
+ else if (console->flags & CON_CONSDEV)
+ console_drivers->flags |= CON_CONSDEV;
release_console_sem();
return res;
diff --git a/kernel/sched.c b/kernel/sched.c
index deca041fc36..76080d142e3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2576,7 +2576,7 @@ void fastcall add_preempt_count(int val)
/*
* Underflow?
*/
- BUG_ON(((int)preempt_count() < 0));
+ BUG_ON((preempt_count() < 0));
preempt_count() += val;
/*
* Spinlock count overflowing soon?
@@ -2869,7 +2869,7 @@ need_resched:
int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key)
{
- task_t *p = curr->task;
+ task_t *p = curr->private;
return try_to_wake_up(p, mode, sync);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index c89821b69ae..d1258729a5f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -213,6 +213,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
fastcall void recalc_sigpending_tsk(struct task_struct *t)
{
if (t->signal->group_stop_count > 0 ||
+ (t->flags & PF_FREEZE) ||
PENDING(&t->pending, &t->blocked) ||
PENDING(&t->signal->shared_pending, &t->blocked))
set_tsk_thread_flag(t, TIF_SIGPENDING);
diff --git a/kernel/sys.c b/kernel/sys.c
index f006632c2ba..da24bc1292d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -525,7 +525,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
}
if (new_egid != old_egid)
{
- current->mm->dumpable = 0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
if (rgid != (gid_t) -1 ||
@@ -556,7 +556,7 @@ asmlinkage long sys_setgid(gid_t gid)
{
if(old_egid != gid)
{
- current->mm->dumpable=0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->gid = current->egid = current->sgid = current->fsgid = gid;
@@ -565,7 +565,7 @@ asmlinkage long sys_setgid(gid_t gid)
{
if(old_egid != gid)
{
- current->mm->dumpable=0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->egid = current->fsgid = gid;
@@ -596,7 +596,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
if(dumpclear)
{
- current->mm->dumpable = 0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->uid = new_ruid;
@@ -653,7 +653,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
if (new_euid != old_euid)
{
- current->mm->dumpable=0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsuid = current->euid = new_euid;
@@ -703,7 +703,7 @@ asmlinkage long sys_setuid(uid_t uid)
if (old_euid != uid)
{
- current->mm->dumpable = 0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsuid = current->euid = uid;
@@ -748,7 +748,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if (euid != (uid_t) -1) {
if (euid != current->euid)
{
- current->mm->dumpable = 0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->euid = euid;
@@ -798,7 +798,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
if (egid != (gid_t) -1) {
if (egid != current->egid)
{
- current->mm->dumpable = 0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->egid = egid;
@@ -845,7 +845,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
{
if (uid != old_fsuid)
{
- current->mm->dumpable = 0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsuid = uid;
@@ -875,7 +875,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
{
if (gid != old_fsgid)
{
- current->mm->dumpable = 0;
+ current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsgid = gid;
@@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf)
*/
if (tbuf) {
struct tms tmp;
- struct task_struct *tsk = current;
- struct task_struct *t;
cputime_t utime, stime, cutime, cstime;
- read_lock(&tasklist_lock);
- utime = tsk->signal->utime;
- stime = tsk->signal->stime;
- t = tsk;
- do {
- utime = cputime_add(utime, t->utime);
- stime = cputime_add(stime, t->stime);
- t = next_thread(t);
- } while (t != tsk);
-
- /*
- * While we have tasklist_lock read-locked, no dying thread
- * can be updating current->signal->[us]time. Instead,
- * we got their counts included in the live thread loop.
- * However, another thread can come in right now and
- * do a wait call that updates current->signal->c[us]time.
- * To make sure we always see that pair updated atomically,
- * we take the siglock around fetching them.
- */
- spin_lock_irq(&tsk->sighand->siglock);
- cutime = tsk->signal->cutime;
- cstime = tsk->signal->cstime;
- spin_unlock_irq(&tsk->sighand->siglock);
- read_unlock(&tasklist_lock);
+#ifdef CONFIG_SMP
+ if (thread_group_empty(current)) {
+ /*
+ * Single thread case without the use of any locks.
+ *
+ * We may race with release_task if two threads are
+ * executing. However, release task first adds up the
+ * counters (__exit_signal) before removing the task
+ * from the process tasklist (__unhash_process).
+ * __exit_signal also acquires and releases the
+ * siglock which results in the proper memory ordering
+ * so that the list modifications are always visible
+ * after the counters have been updated.
+ *
+ * If the counters have been updated by the second thread
+ * but the thread has not yet been removed from the list
+ * then the other branch will be executing which will
+ * block on tasklist_lock until the exit handling of the
+ * other task is finished.
+ *
+ * This also implies that the sighand->siglock cannot
+ * be held by another processor. So we can also
+ * skip acquiring that lock.
+ */
+ utime = cputime_add(current->signal->utime, current->utime);
+ stime = cputime_add(current->signal->utime, current->stime);
+ cutime = current->signal->cutime;
+ cstime = current->signal->cstime;
+ } else
+#endif
+ {
+
+ /* Process with multiple threads */
+ struct task_struct *tsk = current;
+ struct task_struct *t;
+ read_lock(&tasklist_lock);
+ utime = tsk->signal->utime;
+ stime = tsk->signal->stime;
+ t = tsk;
+ do {
+ utime = cputime_add(utime, t->utime);
+ stime = cputime_add(stime, t->stime);
+ t = next_thread(t);
+ } while (t != tsk);
+
+ /*
+ * While we have tasklist_lock read-locked, no dying thread
+ * can be updating current->signal->[us]time. Instead,
+ * we got their counts included in the live thread loop.
+ * However, another thread can come in right now and
+ * do a wait call that updates current->signal->c[us]time.
+ * To make sure we always see that pair updated atomically,
+ * we take the siglock around fetching them.
+ */
+ spin_lock_irq(&tsk->sighand->siglock);
+ cutime = tsk->signal->cutime;
+ cstime = tsk->signal->cstime;
+ spin_unlock_irq(&tsk->sighand->siglock);
+ read_unlock(&tasklist_lock);
+ }
tmp.tms_utime = cputime_to_clock_t(utime);
tmp.tms_stime = cputime_to_clock_t(stime);
tmp.tms_cutime = cputime_to_clock_t(cutime);
@@ -1225,7 +1259,7 @@ static void groups_sort(struct group_info *group_info)
}
/* a simple bsearch */
-static int groups_search(struct group_info *group_info, gid_t grp)
+int groups_search(struct group_info *group_info, gid_t grp)
{
int left, right;
@@ -1652,7 +1686,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
error = 1;
break;
case PR_SET_DUMPABLE:
- if (arg2 != 0 && arg2 != 1) {
+ if (arg2 < 0 || arg2 > 2) {
error = -EINVAL;
break;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 701d12c6306..24a4d12d5aa 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -58,6 +58,7 @@ extern int sysctl_overcommit_ratio;
extern int max_threads;
extern int sysrq_enabled;
extern int core_uses_pid;
+extern int suid_dumpable;
extern char core_pattern[];
extern int cad_pid;
extern int pid_max;
@@ -950,6 +951,14 @@ static ctl_table fs_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+ {
+ .ctl_name = KERN_SETUID_DUMPABLE,
+ .procname = "suid_dumpable",
+ .data = &suid_dumpable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
diff --git a/kernel/timer.c b/kernel/timer.c
index 207aa4f0aa1..51ff917c959 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec);
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
+struct timer_base_s {
+ spinlock_t lock;
+ struct timer_list *running_timer;
+};
+
typedef struct tvec_s {
struct list_head vec[TVN_SIZE];
} tvec_t;
@@ -66,9 +71,8 @@ typedef struct tvec_root_s {
} tvec_root_t;
struct tvec_t_base_s {
- spinlock_t lock;
+ struct timer_base_s t_base;
unsigned long timer_jiffies;
- struct timer_list *running_timer;
tvec_root_t tv1;
tvec_t tv2;
tvec_t tv3;
@@ -77,18 +81,16 @@ struct tvec_t_base_s {
} ____cacheline_aligned_in_smp;
typedef struct tvec_t_base_s tvec_base_t;
+static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
static inline void set_running_timer(tvec_base_t *base,
struct timer_list *timer)
{
#ifdef CONFIG_SMP
- base->running_timer = timer;
+ base->t_base.running_timer = timer;
#endif
}
-/* Fake initialization */
-static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
-
static void check_timer_failed(struct timer_list *timer)
{
static int whine_count;
@@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer)
/*
* Now fix it up
*/
- spin_lock_init(&timer->lock);
timer->magic = TIMER_MAGIC;
}
@@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
list_add_tail(&timer->entry, vec);
}
+typedef struct timer_base_s timer_base_t;
+/*
+ * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
+ * at compile time, and we need timer->base to lock the timer.
+ */
+timer_base_t __init_timer_base
+ ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
+EXPORT_SYMBOL(__init_timer_base);
+
+/***
+ * init_timer - initialize a timer.
+ * @timer: the timer to be initialized
+ *
+ * init_timer() must be done to a timer prior calling *any* of the
+ * other timer functions.
+ */
+void fastcall init_timer(struct timer_list *timer)
+{
+ timer->entry.next = NULL;
+ timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
+ timer->magic = TIMER_MAGIC;
+}
+EXPORT_SYMBOL(init_timer);
+
+static inline void detach_timer(struct timer_list *timer,
+ int clear_pending)
+{
+ struct list_head *entry = &timer->entry;
+
+ __list_del(entry->prev, entry->next);
+ if (clear_pending)
+ entry->next = NULL;
+ entry->prev = LIST_POISON2;
+}
+
+/*
+ * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock
+ * means that all timers which are tied to this base via timer->base are
+ * locked, and the base itself is locked too.
+ *
+ * So __run_timers/migrate_timers can safely modify all timers which could
+ * be found on ->tvX lists.
+ *
+ * When the timer's base is locked, and the timer removed from list, it is
+ * possible to set timer->base = NULL and drop the lock: the timer remains
+ * locked.
+ */
+static timer_base_t *lock_timer_base(struct timer_list *timer,
+ unsigned long *flags)
+{
+ timer_base_t *base;
+
+ for (;;) {
+ base = timer->base;
+ if (likely(base != NULL)) {
+ spin_lock_irqsave(&base->lock, *flags);
+ if (likely(base == timer->base))
+ return base;
+ /* The timer has migrated to another CPU */
+ spin_unlock_irqrestore(&base->lock, *flags);
+ }
+ cpu_relax();
+ }
+}
+
int __mod_timer(struct timer_list *timer, unsigned long expires)
{
- tvec_base_t *old_base, *new_base;
+ timer_base_t *base;
+ tvec_base_t *new_base;
unsigned long flags;
int ret = 0;
BUG_ON(!timer->function);
-
check_timer(timer);
- spin_lock_irqsave(&timer->lock, flags);
+ base = lock_timer_base(timer, &flags);
+
+ if (timer_pending(timer)) {
+ detach_timer(timer, 0);
+ ret = 1;
+ }
+
new_base = &__get_cpu_var(tvec_bases);
-repeat:
- old_base = timer->base;
- /*
- * Prevent deadlocks via ordering by old_base < new_base.
- */
- if (old_base && (new_base != old_base)) {
- if (old_base < new_base) {
- spin_lock(&new_base->lock);
- spin_lock(&old_base->lock);
- } else {
- spin_lock(&old_base->lock);
- spin_lock(&new_base->lock);
- }
+ if (base != &new_base->t_base) {
/*
- * The timer base might have been cancelled while we were
- * trying to take the lock(s):
+ * We are trying to schedule the timer on the local CPU.
+ * However we can't change timer's base while it is running,
+ * otherwise del_timer_sync() can't detect that the timer's
+ * handler yet has not finished. This also guarantees that
+ * the timer is serialized wrt itself.
*/
- if (timer->base != old_base) {
- spin_unlock(&new_base->lock);
- spin_unlock(&old_base->lock);
- goto repeat;
- }
- } else {
- spin_lock(&new_base->lock);
- if (timer->base != old_base) {
- spin_unlock(&new_base->lock);
- goto repeat;
+ if (unlikely(base->running_timer == timer)) {
+ /* The timer remains on a former base */
+ new_base = container_of(base, tvec_base_t, t_base);
+ } else {
+ /* See the comment in lock_timer_base() */
+ timer->base = NULL;
+ spin_unlock(&base->lock);
+ spin_lock(&new_base->t_base.lock);
+ timer->base = &new_base->t_base;
}
}
- /*
- * Delete the previous timeout (if there was any), and install
- * the new one:
- */
- if (old_base) {
- list_del(&timer->entry);
- ret = 1;
- }
timer->expires = expires;
internal_add_timer(new_base, timer);
- timer->base = new_base;
-
- if (old_base && (new_base != old_base))
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
- spin_unlock_irqrestore(&timer->lock, flags);
+ spin_unlock_irqrestore(&new_base->t_base.lock, flags);
return ret;
}
@@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu)
{
tvec_base_t *base = &per_cpu(tvec_bases, cpu);
unsigned long flags;
-
+
BUG_ON(timer_pending(timer) || !timer->function);
check_timer(timer);
- spin_lock_irqsave(&base->lock, flags);
+ spin_lock_irqsave(&base->t_base.lock, flags);
+ timer->base = &base->t_base;
internal_add_timer(base, timer);
- timer->base = base;
- spin_unlock_irqrestore(&base->lock, flags);
+ spin_unlock_irqrestore(&base->t_base.lock, flags);
}
@@ -295,109 +344,84 @@ EXPORT_SYMBOL(mod_timer);
*/
int del_timer(struct timer_list *timer)
{
+ timer_base_t *base;
unsigned long flags;
- tvec_base_t *base;
+ int ret = 0;
check_timer(timer);
-repeat:
- base = timer->base;
- if (!base)
- return 0;
- spin_lock_irqsave(&base->lock, flags);
- if (base != timer->base) {
+ if (timer_pending(timer)) {
+ base = lock_timer_base(timer, &flags);
+ if (timer_pending(timer)) {
+ detach_timer(timer, 1);
+ ret = 1;
+ }
spin_unlock_irqrestore(&base->lock, flags);
- goto repeat;
}
- list_del(&timer->entry);
- /* Need to make sure that anybody who sees a NULL base also sees the list ops */
- smp_wmb();
- timer->base = NULL;
- spin_unlock_irqrestore(&base->lock, flags);
- return 1;
+ return ret;
}
EXPORT_SYMBOL(del_timer);
#ifdef CONFIG_SMP
-/***
- * del_timer_sync - deactivate a timer and wait for the handler to finish.
- * @timer: the timer to be deactivated
- *
- * This function only differs from del_timer() on SMP: besides deactivating
- * the timer it also makes sure the handler has finished executing on other
- * CPUs.
- *
- * Synchronization rules: callers must prevent restarting of the timer,
- * otherwise this function is meaningless. It must not be called from
- * interrupt contexts. The caller must not hold locks which would prevent
- * completion of the timer's handler. Upon exit the timer is not queued and
- * the handler is not running on any CPU.
- *
- * The function returns whether it has deactivated a pending timer or not.
+/*
+ * This function tries to deactivate a timer. Upon successful (ret >= 0)
+ * exit the timer is not queued and the handler is not running on any CPU.
*
- * del_timer_sync() is slow and complicated because it copes with timer
- * handlers which re-arm the timer (periodic timers). If the timer handler
- * is known to not do this (a single shot timer) then use
- * del_singleshot_timer_sync() instead.
+ * It must not be called from interrupt contexts.
*/
-int del_timer_sync(struct timer_list *timer)
+int try_to_del_timer_sync(struct timer_list *timer)
{
- tvec_base_t *base;
- int i, ret = 0;
+ timer_base_t *base;
+ unsigned long flags;
+ int ret = -1;
- check_timer(timer);
+ base = lock_timer_base(timer, &flags);
-del_again:
- ret += del_timer(timer);
+ if (base->running_timer == timer)
+ goto out;
- for_each_online_cpu(i) {
- base = &per_cpu(tvec_bases, i);
- if (base->running_timer == timer) {
- while (base->running_timer == timer) {
- cpu_relax();
- preempt_check_resched();
- }
- break;
- }
+ ret = 0;
+ if (timer_pending(timer)) {
+ detach_timer(timer, 1);
+ ret = 1;
}
- smp_rmb();
- if (timer_pending(timer))
- goto del_again;
+out:
+ spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
-EXPORT_SYMBOL(del_timer_sync);
/***
- * del_singleshot_timer_sync - deactivate a non-recursive timer
+ * del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
*
- * This function is an optimization of del_timer_sync for the case where the
- * caller can guarantee the timer does not reschedule itself in its timer
- * function.
+ * This function only differs from del_timer() on SMP: besides deactivating
+ * the timer it also makes sure the handler has finished executing on other
+ * CPUs.
*
* Synchronization rules: callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
- * interrupt contexts. The caller must not hold locks which wold prevent
- * completion of the timer's handler. Upon exit the timer is not queued and
- * the handler is not running on any CPU.
+ * interrupt contexts. The caller must not hold locks which would prevent
+ * completion of the timer's handler. The timer's handler must not call
+ * add_timer_on(). Upon exit the timer is not queued and the handler is
+ * not running on any CPU.
*
* The function returns whether it has deactivated a pending timer or not.
*/
-int del_singleshot_timer_sync(struct timer_list *timer)
+int del_timer_sync(struct timer_list *timer)
{
- int ret = del_timer(timer);
+ check_timer(timer);
- if (!ret) {
- ret = del_timer_sync(timer);
- BUG_ON(ret);
+ for (;;) {
+ int ret = try_to_del_timer_sync(timer);
+ if (ret >= 0)
+ return ret;
}
-
- return ret;
}
-EXPORT_SYMBOL(del_singleshot_timer_sync);
+
+EXPORT_SYMBOL(del_timer_sync);
#endif
static int cascade(tvec_base_t *base, tvec_t *tv, int index)
@@ -415,7 +439,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
struct timer_list *tmp;
tmp = list_entry(curr, struct timer_list, entry);
- BUG_ON(tmp->base != base);
+ BUG_ON(tmp->base != &base->t_base);
curr = curr->next;
internal_add_timer(base, tmp);
}
@@ -437,7 +461,7 @@ static inline void __run_timers(tvec_base_t *base)
{
struct timer_list *timer;
- spin_lock_irq(&base->lock);
+ spin_lock_irq(&base->t_base.lock);
while (time_after_eq(jiffies, base->timer_jiffies)) {
struct list_head work_list = LIST_HEAD_INIT(work_list);
struct list_head *head = &work_list;
@@ -453,8 +477,7 @@ static inline void __run_timers(tvec_base_t *base)
cascade(base, &base->tv5, INDEX(3));
++base->timer_jiffies;
list_splice_init(base->tv1.vec + index, &work_list);
-repeat:
- if (!list_empty(head)) {
+ while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
@@ -462,25 +485,26 @@ repeat:
fn = timer->function;
data = timer->data;
- list_del(&timer->entry);
set_running_timer(base, timer);
- smp_wmb();
- timer->base = NULL;
- spin_unlock_irq(&base->lock);
+ detach_timer(timer, 1);
+ spin_unlock_irq(&base->t_base.lock);
{
- u32 preempt_count = preempt_count();
+ int preempt_count = preempt_count();
fn(data);
if (preempt_count != preempt_count()) {
- printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count());
+ printk(KERN_WARNING "huh, entered %p "
+ "with preempt_count %08x, exited"
+ " with %08x?\n",
+ fn, preempt_count,
+ preempt_count());
BUG();
}
}
- spin_lock_irq(&base->lock);
- goto repeat;
+ spin_lock_irq(&base->t_base.lock);
}
}
set_running_timer(base, NULL);
- spin_unlock_irq(&base->lock);
+ spin_unlock_irq(&base->t_base.lock);
}
#ifdef CONFIG_NO_IDLE_HZ
@@ -499,7 +523,7 @@ unsigned long next_timer_interrupt(void)
int i, j;
base = &__get_cpu_var(tvec_bases);
- spin_lock(&base->lock);
+ spin_lock(&base->t_base.lock);
expires = base->timer_jiffies + (LONG_MAX >> 1);
list = 0;
@@ -547,7 +571,7 @@ found:
expires = nte->expires;
}
}
- spin_unlock(&base->lock);
+ spin_unlock(&base->t_base.lock);
return expires;
}
#endif
@@ -1286,9 +1310,9 @@ static void __devinit init_timers_cpu(int cpu)
{
int j;
tvec_base_t *base;
-
+
base = &per_cpu(tvec_bases, cpu);
- spin_lock_init(&base->lock);
+ spin_lock_init(&base->t_base.lock);
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
INIT_LIST_HEAD(base->tv4.vec + j);
@@ -1302,22 +1326,16 @@ static void __devinit init_timers_cpu(int cpu)
}
#ifdef CONFIG_HOTPLUG_CPU
-static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
{
struct timer_list *timer;
while (!list_empty(head)) {
timer = list_entry(head->next, struct timer_list, entry);
- /* We're locking backwards from __mod_timer order here,
- beware deadlock. */
- if (!spin_trylock(&timer->lock))
- return 0;
- list_del(&timer->entry);
+ detach_timer(timer, 0);
+ timer->base = &new_base->t_base;
internal_add_timer(new_base, timer);
- timer->base = new_base;
- spin_unlock(&timer->lock);
}
- return 1;
}
static void __devinit migrate_timers(int cpu)
@@ -1331,39 +1349,24 @@ static void __devinit migrate_timers(int cpu)
new_base = &get_cpu_var(tvec_bases);
local_irq_disable();
-again:
- /* Prevent deadlocks via ordering by old_base < new_base. */
- if (old_base < new_base) {
- spin_lock(&new_base->lock);
- spin_lock(&old_base->lock);
- } else {
- spin_lock(&old_base->lock);
- spin_lock(&new_base->lock);
- }
+ spin_lock(&new_base->t_base.lock);
+ spin_lock(&old_base->t_base.lock);
- if (old_base->running_timer)
+ if (old_base->t_base.running_timer)
BUG();
for (i = 0; i < TVR_SIZE; i++)
- if (!migrate_timer_list(new_base, old_base->tv1.vec + i))
- goto unlock_again;
- for (i = 0; i < TVN_SIZE; i++)
- if (!migrate_timer_list(new_base, old_base->tv2.vec + i)
- || !migrate_timer_list(new_base, old_base->tv3.vec + i)
- || !migrate_timer_list(new_base, old_base->tv4.vec + i)
- || !migrate_timer_list(new_base, old_base->tv5.vec + i))
- goto unlock_again;
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
+ migrate_timer_list(new_base, old_base->tv1.vec + i);
+ for (i = 0; i < TVN_SIZE; i++) {
+ migrate_timer_list(new_base, old_base->tv2.vec + i);
+ migrate_timer_list(new_base, old_base->tv3.vec + i);
+ migrate_timer_list(new_base, old_base->tv4.vec + i);
+ migrate_timer_list(new_base, old_base->tv5.vec + i);
+ }
+
+ spin_unlock(&old_base->t_base.lock);
+ spin_unlock(&new_base->t_base.lock);
local_irq_enable();
put_cpu_var(tvec_bases);
- return;
-
-unlock_again:
- /* Avoid deadlock with __mod_timer, by backing off. */
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
- cpu_relax();
- goto again;
}
#endif /* CONFIG_HOTPLUG_CPU */