[SCSI] Merge up to linux-2.6 head

Conflicts: drivers/scsi/jazz_esp.c Same changes made by both SCSI and SPARC trees: problem with UTF-8 conversion in the copyright. Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
author: James Bottomley <jejb@mulgrave.il.steeleye.com> 2007-05-30 23:57:05 -0500
committer: James Bottomley <jejb@mulgrave.il.steeleye.com> 2007-05-30 23:57:05 -0500
commit: 5bc65793cbf8da0d35f19ef025dda22887e79e80 (patch)
tree: 8291998abd73055de6f487fafa174ee2a5d3afee /kernel
parent: 6edae708bf77e012d855a7e2c7766f211d234f4f (diff)
parent: 3f0a6766e0cc5a577805732e5adb50a585c58175 (diff)
18 files changed, 203 insertions, 122 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index c6d14b8008d..5b888c24e43 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -762,11 +762,8 @@ static void exit_notify(struct task_struct *tsk)
 		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		for (t = next_thread(tsk); t != tsk; t = next_thread(t))
-			if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
-				recalc_sigpending_tsk(t);
-				if (signal_pending(t))
-					signal_wake_up(t, 0);
-			}
+			if (!signal_pending(t) && !(t->flags & PF_EXITING))
+				recalc_sigpending_and_wake(t);
 		spin_unlock_irq(&tsk->sighand->siglock);
 		read_unlock(&tasklist_lock);
 	}
diff --git a/kernel/fork.c b/kernel/fork.c
index 49530e40ea8..73ad5cda1bc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -45,6 +45,7 @@
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
+#include <linux/freezer.h>
 #include <linux/delayacct.h>
 #include <linux/taskstats_kern.h>
 #include <linux/random.h>
@@ -1405,7 +1406,9 @@ long do_fork(unsigned long clone_flags,
 		}
 
 		if (clone_flags & CLONE_VFORK) {
+			freezer_do_not_count();
 			wait_for_completion(&vfork);
+			freezer_count();
 			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
 				current->ptrace_message = nr;
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
@@ -1427,10 +1430,8 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep,
 {
 	struct sighand_struct *sighand = data;
 
-	if (flags & SLAB_CTOR_CONSTRUCTOR) {
-		spin_lock_init(&sighand->siglock);
-		INIT_LIST_HEAD(&sighand->signalfd_list);
-	}
+	spin_lock_init(&sighand->siglock);
+	INIT_LIST_HEAD(&sighand->signalfd_list);
 }
 
 void __init proc_caches_init(void)
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index b0d81aae472..bd9e272d55e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -135,6 +135,39 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
 	}
 }
 
+static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
+{
+	struct irqaction *action;
+
+	if (!irqfixup)
+		return 0;
+
+	/* We didn't actually handle the IRQ - see if it was misrouted? */
+	if (action_ret == IRQ_NONE)
+		return 1;
+
+	/*
+	 * But for 'irqfixup == 2' we also do it for handled interrupts if
+	 * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
+	 * traditional PC timer interrupt.. Legacy)
+	 */
+	if (irqfixup < 2)
+		return 0;
+
+	if (!irq)
+		return 1;
+
+	/*
+	 * Since we don't get the descriptor lock, "action" can
+	 * change under us.  We don't really care, but we don't
+	 * want to follow a NULL pointer. So tell the compiler to
+	 * just load it once by using a barrier.
+	 */
+	action = desc->action;
+	barrier();
+	return action && (action->flags & IRQF_IRQPOLL);
+}
+
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
@@ -144,15 +177,10 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			report_bad_irq(irq, desc, action_ret);
 	}
 
-	if (unlikely(irqfixup)) {
-		/* Don't punish working computers */
-		if ((irqfixup == 2 && ((irq == 0) ||
-				(desc->action->flags & IRQF_IRQPOLL))) ||
-				action_ret == IRQ_NONE) {
-			int ok = misrouted_irq(irq);
-			if (action_ret == IRQ_NONE)
-				desc->irqs_unhandled -= ok;
-		}
+	if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
+		int ok = misrouted_irq(irq);
+		if (action_ret == IRQ_NONE)
+			desc->irqs_unhandled -= ok;
 	}
 
 	desc->irq_count++;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index f1bda23140b..fed54418626 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -257,7 +257,8 @@ const char *kallsyms_lookup(unsigned long addr,
 		pos = get_symbol_pos(addr, symbolsize, offset);
 		/* Grab name */
 		kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
-		*modname = NULL;
+		if (modname)
+			*modname = NULL;
 		return namebuf;
 	}
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index df8a8e8f6ca..bbd51b81a3e 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -70,7 +70,7 @@ static int kthread(void *_create)
 	data = create->data;
 
 	/* OK, tell user we're spawned, wait for stop or wakeup */
-	__set_current_state(TASK_INTERRUPTIBLE);
+	__set_current_state(TASK_UNINTERRUPTIBLE);
 	complete(&create->started);
 	schedule();
 
@@ -162,7 +162,10 @@ EXPORT_SYMBOL(kthread_create);
  */
 void kthread_bind(struct task_struct *k, unsigned int cpu)
 {
-	BUG_ON(k->state != TASK_INTERRUPTIBLE);
+	if (k->state != TASK_UNINTERRUPTIBLE) {
+		WARN_ON(1);
+		return;
+	}
 	/* Must have done schedule() in kthread() before we set_task_cpu */
 	wait_task_inactive(k);
 	set_task_cpu(k, cpu);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b5f0543ed84..f445b9cd60f 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -416,7 +416,8 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
 
 	mutex_lock(&pm_mutex);
 	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
-		if (!strncmp(buf, hibernation_modes[i], len)) {
+		if (len == strlen(hibernation_modes[i])
+		    && !strncmp(buf, hibernation_modes[i], len)) {
 			mode = i;
 			break;
 		}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 40d56a31245..8812985f302 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -97,25 +97,26 @@ static int suspend_prepare(suspend_state_t state)
 		}
 	}
 
-	if (pm_ops->prepare) {
-		if ((error = pm_ops->prepare(state)))
-			goto Thaw;
-	}
-
 	suspend_console();
 	error = device_suspend(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "Some devices failed to suspend\n");
-		goto Resume_devices;
+		goto Resume_console;
 	}
+	if (pm_ops->prepare) {
+		if ((error = pm_ops->prepare(state)))
+			goto Resume_devices;
+	}
+
 	error = disable_nonboot_cpus();
 	if (!error)
 		return 0;
 
 	enable_nonboot_cpus();
- Resume_devices:
 	pm_finish(state);
+ Resume_devices:
 	device_resume();
+ Resume_console:
 	resume_console();
  Thaw:
 	thaw_processes();
@@ -289,13 +290,13 @@ static ssize_t state_store(struct kset *kset, const char *buf, size_t n)
 	len = p ? p - buf : n;
 
 	/* First, check if we are requested to hibernate */
-	if (!strncmp(buf, "disk", len)) {
+	if (len == 4 && !strncmp(buf, "disk", len)) {
 		error = hibernate();
 		return error ? error : n;
 	}
 
 	for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
-		if (*s && !strncmp(buf, *s, len))
+		if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
 			break;
 	}
 	if (state < PM_SUSPEND_MAX && *s)
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 08841938738..e0233d8422b 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -31,16 +31,36 @@ static inline int freezeable(struct task_struct * p)
 	return 1;
 }
 
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+	if (!unlikely(current->flags & PF_NOFREEZE)) {
+		current->flags |= PF_FROZEN;
+		wmb();
+	}
+	clear_tsk_thread_flag(current, TIF_FREEZE);
+}
+
 /* Refrigerator is place where frozen processes are stored :-). */
 void refrigerator(void)
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
 	long save;
+
+	task_lock(current);
+	if (freezing(current)) {
+		frozen_process();
+		task_unlock(current);
+	} else {
+		task_unlock(current);
+		return;
+	}
 	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
 
-	frozen_process(current);
 	spin_lock_irq(&current->sighand->siglock);
 	recalc_sigpending(); /* We sent fake signal, clean it up */
 	spin_unlock_irq(&current->sighand->siglock);
@@ -81,7 +101,7 @@ static void cancel_freezing(struct task_struct *p)
 		pr_debug("  clean up: %s\n", p->comm);
 		do_not_freeze(p);
 		spin_lock_irqsave(&p->sighand->siglock, flags);
-		recalc_sigpending_tsk(p);
+		recalc_sigpending_and_wake(p);
 		spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	}
 }
@@ -112,22 +132,12 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
 				cancel_freezing(p);
 				continue;
 			}
-			if (is_user_space(p)) {
-				if (!freeze_user_space)
-					continue;
-
-				/* Freeze the task unless there is a vfork
-				 * completion pending
-				 */
-				if (!p->vfork_done)
-					freeze_process(p);
-			} else {
-				if (freeze_user_space)
-					continue;
-
-				freeze_process(p);
-			}
-			todo++;
+			if (freeze_user_space && !is_user_space(p))
+				continue;
+
+			freeze_process(p);
+			if (!freezer_should_skip(p))
+				todo++;
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 		yield();			/* Yield is okay here */
@@ -149,13 +159,16 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
 				TIMEOUT / HZ, todo);
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (is_user_space(p) == !freeze_user_space)
+			if (freeze_user_space && !is_user_space(p))
 				continue;
 
-			if (freezeable(p) && !frozen(p))
+			task_lock(p);
+			if (freezeable(p) && !frozen(p) &&
+			    !freezer_should_skip(p))
 				printk(KERN_ERR " %s\n", p->comm);
 
 			cancel_freezing(p);
+			task_unlock(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 	}
@@ -200,9 +213,7 @@ static void thaw_tasks(int thaw_user_space)
 		if (is_user_space(p) == !thaw_user_space)
 			continue;
 
-		if (!thaw_process(p))
-			printk(KERN_WARNING " Strange, %s not stopped\n",
-				p->comm );
+		thaw_process(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b8b235cc19d..8b1a1b83714 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -584,7 +584,7 @@ int swsusp_check(void)
 	resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
 	if (!IS_ERR(resume_bdev)) {
 		set_blocksize(resume_bdev, PAGE_SIZE);
-		memset(swsusp_header, 0, sizeof(PAGE_SIZE));
+		memset(swsusp_header, 0, PAGE_SIZE);
 		error = bio_read_page(swsusp_resume_block,
 					swsusp_header, NULL);
 		if (error)
diff --git a/kernel/profile.c b/kernel/profile.c
index cc91b9bf759..5b20fe977be 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -26,6 +26,7 @@
 #include <asm/sections.h>
 #include <asm/semaphore.h>
 #include <asm/irq_regs.h>
+#include <asm/ptrace.h>
 
 struct profile_hit {
 	u32 pc, hits;
diff --git a/kernel/sched.c b/kernel/sched.c
index 799d23b4e35..13cdab3b4c4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4775,9 +4775,7 @@ int __sched cond_resched_softirq(void)
 	BUG_ON(!in_softirq());
 
 	if (need_resched() && system_state == SYSTEM_RUNNING) {
-		raw_local_irq_disable();
-		_local_bh_enable();
-		raw_local_irq_enable();
+		local_bh_enable();
 		__cond_resched();
 		local_bh_disable();
 		return 1;
diff --git a/kernel/signal.c b/kernel/signal.c
index 364fc95bf97..acdfc0549c6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -96,15 +96,27 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
 
-fastcall void recalc_sigpending_tsk(struct task_struct *t)
+static int recalc_sigpending_tsk(struct task_struct *t)
 {
 	if (t->signal->group_stop_count > 0 ||
 	    (freezing(t)) ||
 	    PENDING(&t->pending, &t->blocked) ||
-	    PENDING(&t->signal->shared_pending, &t->blocked))
+	    PENDING(&t->signal->shared_pending, &t->blocked)) {
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
-	else
-		clear_tsk_thread_flag(t, TIF_SIGPENDING);
+		return 1;
+	}
+	clear_tsk_thread_flag(t, TIF_SIGPENDING);
+	return 0;
+}
+
+/*
+ * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
+ * This is superfluous when called on current, the wakeup is a harmless no-op.
+ */
+void recalc_sigpending_and_wake(struct task_struct *t)
+{
+	if (recalc_sigpending_tsk(t))
+		signal_wake_up(t, 0);
 }
 
 void recalc_sigpending(void)
@@ -744,7 +756,7 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 		action->sa.sa_handler = SIG_DFL;
 		if (blocked) {
 			sigdelset(&t->blocked, sig);
-			recalc_sigpending_tsk(t);
+			recalc_sigpending_and_wake(t);
 		}
 	}
 	ret = specific_send_sig_info(sig, info, t);
@@ -2273,7 +2285,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 			rm_from_queue_full(&mask, &t->signal->shared_pending);
 			do {
 				rm_from_queue_full(&mask, &t->pending);
-				recalc_sigpending_tsk(t);
+				recalc_sigpending_and_wake(t);
 				t = next_thread(t);
 			} while (t != current);
 		}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4073353abd4..30ee462ee79 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -227,7 +227,7 @@ static ctl_table kern_table[] = {
 		.ctl_name	= KERN_CORE_PATTERN,
 		.procname	= "core_pattern",
 		.data		= core_pattern,
-		.maxlen		= 128,
+		.maxlen		= CORENAME_MAX_SIZE,
 		.mode		= 0644,
 		.proc_handler	= &proc_dostring,
 		.strategy	= &sysctl_string,
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index cb25649c6f5..87aa5ff931e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -11,6 +11,8 @@
 #include <linux/mm.h>
 #include <linux/time.h>
 #include <linux/timex.h>
+#include <linux/jiffies.h>
+#include <linux/hrtimer.h>
 
 #include <asm/div64.h>
 #include <asm/timex.h>
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index eadfce2fff7..8001d37071f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -243,11 +243,18 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
 {
 	int cpu = get_cpu();
 
-	if (cpu == *oncpu)
-		tick_do_broadcast_on_off(&reason);
-	else
-		smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
-					 &reason, 1, 1);
+	if (!cpu_isset(*oncpu, cpu_online_map)) {
+		printk(KERN_ERR "tick-braodcast: ignoring broadcast for "
+		       "offline CPU #%d\n", *oncpu);
+	} else {
+
+		if (cpu == *oncpu)
+			tick_do_broadcast_on_off(&reason);
+		else
+			smp_call_function_single(*oncpu,
+						 tick_do_broadcast_on_off,
+						 &reason, 1, 1);
+	}
 	put_cpu();
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3483e6cb954..52db9e3c526 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -167,9 +167,15 @@ void tick_nohz_stop_sched_tick(void)
 		goto end;
 
 	cpu = smp_processor_id();
-	if (unlikely(local_softirq_pending()))
-		printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
-		       local_softirq_pending());
+	if (unlikely(local_softirq_pending())) {
+		static int ratelimit;
+
+		if (ratelimit < 10) {
+			printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
+			       local_softirq_pending());
+			ratelimit++;
+		}
+	}
 
 	now = ktime_get();
 	/*
@@ -241,6 +247,21 @@ void tick_nohz_stop_sched_tick(void)
 		if (cpu == tick_do_timer_cpu)
 			tick_do_timer_cpu = -1;
 
+		ts->idle_sleeps++;
+
+		/*
+		 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
+		 * there is no timer pending or at least extremly far
+		 * into the future (12 days for HZ=1000). In this case
+		 * we simply stop the tick timer:
+		 */
+		if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
+			ts->idle_expires.tv64 = KTIME_MAX;
+			if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+				hrtimer_cancel(&ts->sched_timer);
+			goto out;
+		}
+
 		/*
 		 * calculate the expiry time for the next timer wheel
 		 * timer
@@ -248,7 +269,6 @@ void tick_nohz_stop_sched_tick(void)
 		expires = ktime_add_ns(last_update, tick_period.tv64 *
 				       delta_jiffies);
 		ts->idle_expires = expires;
-		ts->idle_sleeps++;
 
 		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
 			hrtimer_start(&ts->sched_timer, expires,
diff --git a/kernel/timer.c b/kernel/timer.c
index 5ec5490f8d8..1a69705c2fb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -666,7 +666,7 @@ static inline void __run_timers(tvec_base_t *base)
 static unsigned long __next_timer_interrupt(tvec_base_t *base)
 {
 	unsigned long timer_jiffies = base->timer_jiffies;
-	unsigned long expires = timer_jiffies + (LONG_MAX >> 1);
+	unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
 	int index, slot, array, found = 0;
 	struct timer_list *nte;
 	tvec_t *varray[4];
@@ -752,6 +752,14 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
 
 	tsdelta = ktime_to_timespec(hr_delta);
 	delta = timespec_to_jiffies(&tsdelta);
+
+	/*
+	 * Limit the delta to the max value, which is checked in
+	 * tick_nohz_stop_sched_tick():
+	 */
+	if (delta > NEXT_TIMER_MAX_DELTA)
+		delta = NEXT_TIMER_MAX_DELTA;
+
 	/*
 	 * Take rounding errors in to account and make sure, that it
 	 * expires in the next tick. Otherwise we go into an endless
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fb56fedd5c0..3bebf73be97 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -47,7 +47,6 @@ struct cpu_workqueue_struct {
 
 	struct workqueue_struct *wq;
 	struct task_struct *thread;
-	int should_stop;
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
 } ____cacheline_aligned;
@@ -71,7 +70,13 @@ static LIST_HEAD(workqueues);
 
 static int singlethread_cpu __read_mostly;
 static cpumask_t cpu_singlethread_map __read_mostly;
-/* optimization, we could use cpu_possible_map */
+/*
+ * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
+ * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
+ * which comes in between can't use for_each_online_cpu(). We could
+ * use cpu_possible_map, the cpumask below is more a documentation
+ * than optimization.
+ */
 static cpumask_t cpu_populated_map __read_mostly;
 
 /* If it's single threaded, it isn't in the list of workqueues. */
@@ -272,24 +277,6 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 	spin_unlock_irq(&cwq->lock);
 }
 
-/*
- * NOTE: the caller must not touch *cwq if this func returns true
- */
-static int cwq_should_stop(struct cpu_workqueue_struct *cwq)
-{
-	int should_stop = cwq->should_stop;
-
-	if (unlikely(should_stop)) {
-		spin_lock_irq(&cwq->lock);
-		should_stop = cwq->should_stop && list_empty(&cwq->worklist);
-		if (should_stop)
-			cwq->thread = NULL;
-		spin_unlock_irq(&cwq->lock);
-	}
-
-	return should_stop;
-}
-
 static int worker_thread(void *__cwq)
 {
 	struct cpu_workqueue_struct *cwq = __cwq;
@@ -302,14 +289,15 @@ static int worker_thread(void *__cwq)
 
 	for (;;) {
 		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
-		if (!freezing(current) && !cwq->should_stop
-		    && list_empty(&cwq->worklist))
+		if (!freezing(current) &&
+		    !kthread_should_stop() &&
+		    list_empty(&cwq->worklist))
 			schedule();
 		finish_wait(&cwq->more_work, &wait);
 
 		try_to_freeze();
 
-		if (cwq_should_stop(cwq))
+		if (kthread_should_stop())
 			break;
 
 		run_workqueue(cwq);
@@ -340,18 +328,21 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	insert_work(cwq, &barr->work, tail);
 }
 
-static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
+static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 {
+	int active;
+
 	if (cwq->thread == current) {
 		/*
 		 * Probably keventd trying to flush its own queue. So simply run
 		 * it by hand rather than deadlocking.
 		 */
 		run_workqueue(cwq);
+		active = 1;
 	} else {
 		struct wq_barrier barr;
-		int active = 0;
 
+		active = 0;
 		spin_lock_irq(&cwq->lock);
 		if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
 			insert_wq_barrier(cwq, &barr, 1);
@@ -362,6 +353,8 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 		if (active)
 			wait_for_completion(&barr.done);
 	}
+
+	return active;
 }
 
 /**
@@ -674,7 +667,6 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 		return PTR_ERR(p);
 
 	cwq->thread = p;
-	cwq->should_stop = 0;
 
 	return 0;
 }
@@ -740,29 +732,27 @@ EXPORT_SYMBOL_GPL(__create_workqueue);
 
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
-	struct wq_barrier barr;
-	int alive = 0;
-
-	spin_lock_irq(&cwq->lock);
-	if (cwq->thread != NULL) {
-		insert_wq_barrier(cwq, &barr, 1);
-		cwq->should_stop = 1;
-		alive = 1;
-	}
-	spin_unlock_irq(&cwq->lock);
+	/*
+	 * Our caller is either destroy_workqueue() or CPU_DEAD,
+	 * workqueue_mutex protects cwq->thread
+	 */
+	if (cwq->thread == NULL)
+		return;
 
-	if (alive) {
-		wait_for_completion(&barr.done);
+	/*
+	 * If the caller is CPU_DEAD the single flush_cpu_workqueue()
+	 * is not enough, a concurrent flush_workqueue() can insert a
+	 * barrier after us.
+	 * When ->worklist becomes empty it is safe to exit because no
+	 * more work_structs can be queued on this cwq: flush_workqueue
+	 * checks list_empty(), and a "normal" queue_work() can't use
+	 * a dead CPU.
+	 */
+	while (flush_cpu_workqueue(cwq))
+		;
 
-		while (unlikely(cwq->thread != NULL))
-			cpu_relax();
-		/*
-		 * Wait until cwq->thread unlocks cwq->lock,
-		 * it won't touch *cwq after that.
-		 */
-		smp_rmb();
-		spin_unlock_wait(&cwq->lock);
-	}
+	kthread_stop(cwq->thread);
+	cwq->thread = NULL;
 }
 
 /**
author	James Bottomley <jejb@mulgrave.il.steeleye.com>	2007-05-30 23:57:05 -0500
committer	James Bottomley <jejb@mulgrave.il.steeleye.com>	2007-05-30 23:57:05 -0500
commit	5bc65793cbf8da0d35f19ef025dda22887e79e80 (patch)
tree	8291998abd73055de6f487fafa174ee2a5d3afee /kernel
parent	6edae708bf77e012d855a7e2c7766f211d234f4f (diff)
parent	3f0a6766e0cc5a577805732e5adb50a585c58175 (diff)