summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAnton Altaparmakov <aia21@cantab.net>2005-06-30 09:52:20 +0100
committerAnton Altaparmakov <aia21@cantab.net>2005-06-30 09:52:20 +0100
commitc2d9b8387bce8b4a0fd402fab7dc1319d11a418d (patch)
tree082cf7dd287f61635198011e61c3de1be130cc42 /kernel
parent2a322e4c08be4e7cb0c04b427ddaaa679fd88863 (diff)
parent9b4311eedb17fa88f02e4876cd6aa9a08e383cd6 (diff)
Automerge with /usr/src/ntfs-2.6.git.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/irq/autoprobe.c9
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/irq/spurious.c113
-rw-r--r--kernel/itimer.c8
-rw-r--r--kernel/kexec.c10
-rw-r--r--kernel/kprobes.c170
-rw-r--r--kernel/sched.c17
9 files changed, 262 insertions, 74 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 3ebcd60a19c..9d1b10ed013 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -784,6 +784,8 @@ fastcall NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
+ WARN_ON(atomic_read(&tsk->fs_excl));
+
if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid))
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c7806873bf..cdef6cea890 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1090,6 +1090,11 @@ static task_t *copy_process(unsigned long clone_flags,
spin_unlock(&current->sighand->siglock);
}
+ /*
+ * inherit ioprio
+ */
+ p->ioprio = current->ioprio;
+
SET_LINKS(p);
if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 98d62d8efea..3467097ca61 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -9,6 +9,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/delay.h>
/*
* Autodetection depends on the fact that any interrupt that
@@ -26,7 +27,7 @@ static DECLARE_MUTEX(probe_sem);
*/
unsigned long probe_irq_on(void)
{
- unsigned long val, delay;
+ unsigned long val;
irq_desc_t *desc;
unsigned int i;
@@ -45,8 +46,7 @@ unsigned long probe_irq_on(void)
}
/* Wait for longstanding interrupts to trigger. */
- for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
- /* about 20ms delay */ barrier();
+ msleep(20);
/*
* enable any unassigned irqs
@@ -68,8 +68,7 @@ unsigned long probe_irq_on(void)
/*
* Wait for spurious interrupts to trigger
*/
- for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
- /* about 100ms delay */ barrier();
+ msleep(100);
/*
* Now filter out any obviously spurious interrupts
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 436c7d93c00..c29f83c1649 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -172,7 +172,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
spin_lock(&desc->lock);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, regs);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index ba039e827d5..7df9abd5ec8 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -11,6 +11,83 @@
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
+static int irqfixup;
+
+/*
+ * Recovery handler for misrouted interrupts.
+ */
+
+static int misrouted_irq(int irq, struct pt_regs *regs)
+{
+ int i;
+ irq_desc_t *desc;
+ int ok = 0;
+ int work = 0; /* Did we do work for a real IRQ */
+
+ for(i = 1; i < NR_IRQS; i++) {
+ struct irqaction *action;
+
+ if (i == irq) /* Already tried */
+ continue;
+ desc = &irq_desc[i];
+ spin_lock(&desc->lock);
+ action = desc->action;
+ /* Already running on another processor */
+ if (desc->status & IRQ_INPROGRESS) {
+ /*
+ * Already running: If it is shared get the other
+ * CPU to go looking for our mystery interrupt too
+ */
+ if (desc->action && (desc->action->flags & SA_SHIRQ))
+ desc->status |= IRQ_PENDING;
+ spin_unlock(&desc->lock);
+ continue;
+ }
+ /* Honour the normal IRQ locking */
+ desc->status |= IRQ_INPROGRESS;
+ spin_unlock(&desc->lock);
+ while (action) {
+ /* Only shared IRQ handlers are safe to call */
+ if (action->flags & SA_SHIRQ) {
+ if (action->handler(i, action->dev_id, regs) ==
+ IRQ_HANDLED)
+ ok = 1;
+ }
+ action = action->next;
+ }
+ local_irq_disable();
+ /* Now clean up the flags */
+ spin_lock(&desc->lock);
+ action = desc->action;
+
+ /*
+ * While we were looking for a fixup someone queued a real
+ * IRQ clashing with our walk
+ */
+
+ while ((desc->status & IRQ_PENDING) && action) {
+ /*
+ * Perform real IRQ processing for the IRQ we deferred
+ */
+ work = 1;
+ spin_unlock(&desc->lock);
+ handle_IRQ_event(i, regs, action);
+ spin_lock(&desc->lock);
+ desc->status &= ~IRQ_PENDING;
+ }
+ desc->status &= ~IRQ_INPROGRESS;
+ /*
+ * If we did actual work for the real IRQ line we must let the
+ * IRQ controller clean up too
+ */
+ if(work)
+ desc->handler->end(i);
+ spin_unlock(&desc->lock);
+ }
+ /* So the caller can adjust the irq error counts */
+ return ok;
+}
+
/*
* If 99,900 of the previous 100,000 interrupts have not been handled
* then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -31,7 +108,8 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
printk(KERN_ERR "irq event %d: bogus return value %x\n",
irq, action_ret);
} else {
- printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+ printk(KERN_ERR "irq %d: nobody cared (try booting with "
+ "the \"irqpoll\" option)\n", irq);
}
dump_stack();
printk(KERN_ERR "handlers:\n");
@@ -55,7 +133,8 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio
}
}
-void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
+ struct pt_regs *regs)
{
if (action_ret != IRQ_HANDLED) {
desc->irqs_unhandled++;
@@ -63,6 +142,15 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
report_bad_irq(irq, desc, action_ret);
}
+ if (unlikely(irqfixup)) {
+ /* Don't punish working computers */
+ if ((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) {
+ int ok = misrouted_irq(irq, regs);
+ if (action_ret == IRQ_NONE)
+ desc->irqs_unhandled -= ok;
+ }
+ }
+
desc->irq_count++;
if (desc->irq_count < 100000)
return;
@@ -94,3 +182,24 @@ int __init noirqdebug_setup(char *str)
__setup("noirqdebug", noirqdebug_setup);
+static int __init irqfixup_setup(char *str)
+{
+ irqfixup = 1;
+ printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+ printk(KERN_WARNING "This may impact system performance.\n");
+ return 1;
+}
+
+__setup("irqfixup", irqfixup_setup);
+
+static int __init irqpoll_setup(char *str)
+{
+ irqfixup = 2;
+ printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+ "enabled\n");
+ printk(KERN_WARNING "This may significantly impact system "
+ "performance\n");
+ return 1;
+}
+
+__setup("irqpoll", irqpoll_setup);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 1dc988e0d2c..a72cb0e5aa4 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -153,11 +153,15 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
switch (which) {
case ITIMER_REAL:
+again:
spin_lock_irq(&tsk->sighand->siglock);
interval = tsk->signal->it_real_incr;
val = it_real_value(tsk->signal);
- if (val)
- del_timer_sync(&tsk->signal->real_timer);
+ /* We are sharing ->siglock with it_real_fn() */
+ if (try_to_del_timer_sync(&tsk->signal->real_timer) < 0) {
+ spin_unlock_irq(&tsk->sighand->siglock);
+ goto again;
+ }
tsk->signal->it_real_incr =
timeval_to_jiffies(&value->it_interval);
it_real_arm(tsk, timeval_to_jiffies(&value->it_value));
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 7843548cf2d..cdd4dcd8fb6 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -241,7 +241,7 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
unsigned long nr_segments,
- struct kexec_segment *segments)
+ struct kexec_segment __user *segments)
{
int result;
struct kimage *image;
@@ -650,7 +650,7 @@ static kimage_entry_t *kimage_dst_used(struct kimage *image,
}
}
- return 0;
+ return NULL;
}
static struct page *kimage_alloc_page(struct kimage *image,
@@ -696,7 +696,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
/* Allocate a page, if we run out of memory give up */
page = kimage_alloc_pages(gfp_mask, 0);
if (!page)
- return 0;
+ return NULL;
/* If the page cannot be used file it away */
if (page_to_pfn(page) >
(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
@@ -754,7 +754,7 @@ static int kimage_load_normal_segment(struct kimage *image,
unsigned long maddr;
unsigned long ubytes, mbytes;
int result;
- unsigned char *buf;
+ unsigned char __user *buf;
result = 0;
buf = segment->buf;
@@ -818,7 +818,7 @@ static int kimage_load_crash_segment(struct kimage *image,
unsigned long maddr;
unsigned long ubytes, mbytes;
int result;
- unsigned char *buf;
+ unsigned char __user *buf;
result = 0;
buf = segment->buf;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 334f37472c5..90c0e82b650 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -36,6 +36,7 @@
#include <linux/hash.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/moduleloader.h>
#include <asm/cacheflush.h>
#include <asm/errno.h>
#include <asm/kdebug.h>
@@ -50,6 +51,106 @@ unsigned int kprobe_cpu = NR_CPUS;
static DEFINE_SPINLOCK(kprobe_lock);
static struct kprobe *curr_kprobe;
+/*
+ * kprobe->ainsn.insn points to the copy of the instruction to be
+ * single-stepped. x86_64, POWER4 and above have no-exec support and
+ * stepping on the instruction on a vmalloced/kmalloced/data page
+ * is a recipe for disaster
+ */
+#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe_insn_page {
+ struct hlist_node hlist;
+ kprobe_opcode_t *insns; /* Page of instruction slots */
+ char slot_used[INSNS_PER_PAGE];
+ int nused;
+};
+
+static struct hlist_head kprobe_insn_pages;
+
+/**
+ * get_insn_slot() - Find a slot on an executable page for an instruction.
+ * We allocate an executable page if there's no room on existing ones.
+ */
+kprobe_opcode_t *get_insn_slot(void)
+{
+ struct kprobe_insn_page *kip;
+ struct hlist_node *pos;
+
+ hlist_for_each(pos, &kprobe_insn_pages) {
+ kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
+ if (kip->nused < INSNS_PER_PAGE) {
+ int i;
+ for (i = 0; i < INSNS_PER_PAGE; i++) {
+ if (!kip->slot_used[i]) {
+ kip->slot_used[i] = 1;
+ kip->nused++;
+ return kip->insns + (i * MAX_INSN_SIZE);
+ }
+ }
+ /* Surprise! No unused slots. Fix kip->nused. */
+ kip->nused = INSNS_PER_PAGE;
+ }
+ }
+
+ /* All out of space. Need to allocate a new page. Use slot 0.*/
+ kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
+ if (!kip) {
+ return NULL;
+ }
+
+ /*
+ * Use module_alloc so this page is within +/- 2GB of where the
+ * kernel image and loaded module images reside. This is required
+ * so x86_64 can correctly handle the %rip-relative fixups.
+ */
+ kip->insns = module_alloc(PAGE_SIZE);
+ if (!kip->insns) {
+ kfree(kip);
+ return NULL;
+ }
+ INIT_HLIST_NODE(&kip->hlist);
+ hlist_add_head(&kip->hlist, &kprobe_insn_pages);
+ memset(kip->slot_used, 0, INSNS_PER_PAGE);
+ kip->slot_used[0] = 1;
+ kip->nused = 1;
+ return kip->insns;
+}
+
+void free_insn_slot(kprobe_opcode_t *slot)
+{
+ struct kprobe_insn_page *kip;
+ struct hlist_node *pos;
+
+ hlist_for_each(pos, &kprobe_insn_pages) {
+ kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
+ if (kip->insns <= slot &&
+ slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
+ int i = (slot - kip->insns) / MAX_INSN_SIZE;
+ kip->slot_used[i] = 0;
+ kip->nused--;
+ if (kip->nused == 0) {
+ /*
+ * Page is no longer in use. Free it unless
+ * it's the last one. We keep the last one
+ * so as not to have to set it up again the
+ * next time somebody inserts a probe.
+ */
+ hlist_del(&kip->hlist);
+ if (hlist_empty(&kprobe_insn_pages)) {
+ INIT_HLIST_NODE(&kip->hlist);
+ hlist_add_head(&kip->hlist,
+ &kprobe_insn_pages);
+ } else {
+ module_free(NULL, kip->insns);
+ kfree(kip);
+ }
+ }
+ return;
+ }
+ }
+}
+
/* Locks kprobe: irqs must be disabled */
void lock_kprobes(void)
{
@@ -139,12 +240,6 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler,
- .post_handler = trampoline_post_handler
-};
-
struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
{
struct hlist_node *node;
@@ -163,35 +258,18 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
return NULL;
}
-struct kretprobe_instance *get_rp_inst(void *sara)
-{
- struct hlist_head *head;
- struct hlist_node *node;
- struct task_struct *tsk;
- struct kretprobe_instance *ri;
-
- tsk = arch_get_kprobe_task(sara);
- head = &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
- hlist_for_each_entry(ri, node, head, hlist) {
- if (ri->stack_addr == sara)
- return ri;
- }
- return NULL;
-}
-
void add_rp_inst(struct kretprobe_instance *ri)
{
- struct task_struct *tsk;
/*
* Remove rp inst off the free list -
* Add it back when probed function returns
*/
hlist_del(&ri->uflist);
- tsk = arch_get_kprobe_task(ri->stack_addr);
+
/* Add rp inst onto table */
INIT_HLIST_NODE(&ri->hlist);
hlist_add_head(&ri->hlist,
- &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]);
+ &kretprobe_inst_table[hash_ptr(ri->task, KPROBE_HASH_BITS)]);
/* Also add this rp inst to the used list. */
INIT_HLIST_NODE(&ri->uflist);
@@ -218,34 +296,25 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
}
-struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk)
-{
- struct task_struct *tsk;
- struct hlist_head *head;
- struct hlist_node *node;
- struct kretprobe_instance *ri;
-
- head = &kretprobe_inst_table[hash_ptr(tk, KPROBE_HASH_BITS)];
-
- hlist_for_each_entry(ri, node, head, hlist) {
- tsk = arch_get_kprobe_task(ri->stack_addr);
- if (tsk == tk)
- return ri;
- }
- return NULL;
-}
-
/*
- * This function is called from do_exit or do_execv when task tk's stack is
- * about to be recycled. Recycle any function-return probe instances
- * associated with this task. These represent probed functions that have
- * been called but may never return.
+ * This function is called from exit_thread or flush_thread when task tk's
+ * stack is being recycled so that we can recycle any function-return probe
+ * instances associated with this task. These left over instances represent
+ * probed functions that have been called but will never return.
*/
void kprobe_flush_task(struct task_struct *tk)
{
+ struct kretprobe_instance *ri;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
unsigned long flags = 0;
+
spin_lock_irqsave(&kprobe_lock, flags);
- arch_kprobe_flush_task(tk);
+ head = kretprobe_inst_table_head(current);
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task == tk)
+ recycle_rp_inst(ri);
+ }
spin_unlock_irqrestore(&kprobe_lock, flags);
}
@@ -505,9 +574,10 @@ static int __init init_kprobes(void)
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
}
- err = register_die_notifier(&kprobe_exceptions_nb);
- /* Register the trampoline probe for return probe */
- register_kprobe(&trampoline_p);
+ err = arch_init();
+ if (!err)
+ err = register_die_notifier(&kprobe_exceptions_nb);
+
return err;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index a07cff90d84..5f2182d4224 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3448,15 +3448,7 @@ int task_nice(const task_t *p)
{
return TASK_NICE(p);
}
-
-/*
- * The only users of task_nice are binfmt_elf and binfmt_elf32.
- * binfmt_elf is no longer modular, but binfmt_elf32 still is.
- * Therefore, task_nice is needed if there is a compat_mode.
- */
-#ifdef CONFIG_COMPAT
EXPORT_SYMBOL_GPL(task_nice);
-#endif
/**
* idle_cpu - is a given cpu idle currently?
@@ -4174,6 +4166,14 @@ void show_state(void)
read_unlock(&tasklist_lock);
}
+/**
+ * init_idle - set up an idle thread for a given CPU
+ * @idle: task in question
+ * @cpu: cpu the idle task belongs to
+ *
+ * NOTE: this function does not set the idle thread's NEED_RESCHED
+ * flag, to make booting more robust.
+ */
void __devinit init_idle(task_t *idle, int cpu)
{
runqueue_t *rq = cpu_rq(cpu);
@@ -4191,7 +4191,6 @@ void __devinit init_idle(task_t *idle, int cpu)
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
idle->oncpu = 1;
#endif
- set_tsk_need_resched(idle);
spin_unlock_irqrestore(&rq->lock, flags);
/* Set the preempt count _outside_ the spinlocks! */