From d3251859168b0b12841e1b90d6d768ab478dc23d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 10 May 2013 11:10:17 -0700 Subject: workqueue: workqueue_congested() shouldn't translate WORK_CPU_UNBOUND into node number df2d5ae499 ("workqueue: map an unbound workqueues to multiple per-node pool_workqueues") made unbound workqueues to map to multiple per-node pool_workqueues and accordingly updated workqueue_contested() so that, for unbound workqueues, it maps the specified @cpu to the NUMA node number to obtain the matching pool_workqueue to query the congested state. Before this change, workqueue_congested() ignored @cpu for unbound workqueues as there was only one pool_workqueue and some users (fscache) called it with WORK_CPU_UNBOUND. After the commit, this causes the following oops as WORK_CPU_UNBOUND gets translated to garbage by cpu_to_node(). BUG: unable to handle kernel paging request at ffff8803598d98b8 IP: [] unbound_pwq_by_node+0xa1/0xfa PGD 2421067 PUD 0 Oops: 0000 [#1] SMP CPU: 1 PID: 2689 Comm: cat Tainted: GF 3.9.0-fsdevel+ #4 task: ffff88003d801040 ti: ffff880025806000 task.ti: ffff880025806000 RIP: 0010:[] [] unbound_pwq_by_node+0xa1/0xfa RSP: 0018:ffff880025807ad8 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffff8800388a2400 RCX: 0000000000000003 RDX: ffff880025807fd8 RSI: ffffffff81a31420 RDI: ffff88003d8016e0 RBP: ffff880025807ae8 R08: ffff88003d801730 R09: ffffffffa00b4898 R10: ffffffff81044217 R11: ffff88003d801040 R12: 0000000064206e97 R13: ffff880036059d98 R14: ffff880038cc8080 R15: ffff880038cc82d0 FS: 00007f21afd9c740(0000) GS:ffff88003d100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff8803598d98b8 CR3: 000000003df49000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff8800388a2400 0000000000000002 ffff880025807b18 ffffffff810442ce ffffffff81044217 ffff880000000002 ffff8800371b4080 ffff88003d112ec0 ffff880025807b38 ffffffffa00810b0 ffff880036059d88 ffff880036059be8 Call Trace: [] workqueue_congested+0xb7/0x12c [] fscache_enqueue_object+0xb2/0xe8 [fscache] [] __fscache_acquire_cookie+0x3b9/0x56c [fscache] [] nfs_fscache_set_inode_cookie+0xee/0x132 [nfs] [] do_open+0x9/0xd [nfs] [] do_dentry_open+0x175/0x24b [] finish_open+0x41/0x51 Fix it by using smp_processor_id() if @cpu is WORK_CPU_UNBOUND. Signed-off-by: Tejun Heo Reported-by: David Howells Tested-and-Acked-by: David Howells --- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4aa9f5bc6b2..1ae602809ef 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4311,6 +4311,12 @@ bool current_is_workqueue_rescuer(void) * no synchronization around this function and the test result is * unreliable and only useful as advisory hints or for debugging. * + * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU. + * Note that both per-cpu and unbound workqueues may be associated with + * multiple pool_workqueues which have separate congested states. A + * workqueue being congested on one CPU doesn't mean the workqueue is also + * contested on other CPUs / NUMA nodes. + * * RETURNS: * %true if congested, %false otherwise. */ @@ -4321,6 +4327,9 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) rcu_read_lock_sched(); + if (cpu == WORK_CPU_UNBOUND) + cpu = smp_processor_id(); + if (!(wq->flags & WQ_UNBOUND)) pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); else -- cgit v1.2.3-70-g09d2 From 8f174b1175a10903ade40f36eb6c896412877ca0 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 1 May 2013 00:07:00 +0900 Subject: workqueue: correct handling of the pool spin_lock When we fail to mutex_trylock(), we release the pool spin_lock and do mutex_lock(). After that, we should regrab the pool spin_lock, but, regrabbing is missed in current code. So correct it. Cc: Lai Jiangshan Signed-off-by: Joonsoo Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1ae602809ef..286847b9022 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2059,6 +2059,7 @@ static bool manage_workers(struct worker *worker) if (unlikely(!mutex_trylock(&pool->manager_mutex))) { spin_unlock_irq(&pool->lock); mutex_lock(&pool->manager_mutex); + spin_lock_irq(&pool->lock); ret = true; } -- cgit v1.2.3-70-g09d2 From ad7b1f841f8a54c6d61ff181451f55b68175e15a Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 6 May 2013 17:44:55 -0400 Subject: workqueue: Make schedule_work() available again to non GPL modules Commit 8425e3d5bdbe ("workqueue: inline trivial wrappers") changed schedule_work() and schedule_delayed_work() to inline wrappers, but these rely on some symbols that are EXPORT_SYMBOL_GPL, while the original functions were EXPORT_SYMBOL. This has the effect of changing the licensing requirement for these functions and making them unavailable to non GPL modules. Make them available again by removing the restriction on the required symbols. Signed-off-by: Marc Dionne Signed-off-by: Tejun Heo --- kernel/workqueue.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 286847b9022..02916f42138 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -296,7 +296,7 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER); static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; struct workqueue_struct *system_wq __read_mostly; -EXPORT_SYMBOL_GPL(system_wq); +EXPORT_SYMBOL(system_wq); struct workqueue_struct *system_highpri_wq __read_mostly; EXPORT_SYMBOL_GPL(system_highpri_wq); struct workqueue_struct *system_long_wq __read_mostly; @@ -1411,7 +1411,7 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, local_irq_restore(flags); return ret; } -EXPORT_SYMBOL_GPL(queue_work_on); +EXPORT_SYMBOL(queue_work_on); void delayed_work_timer_fn(unsigned long __data) { @@ -1485,7 +1485,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, local_irq_restore(flags); return ret; } -EXPORT_SYMBOL_GPL(queue_delayed_work_on); +EXPORT_SYMBOL(queue_delayed_work_on); /** * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU -- cgit v1.2.3-70-g09d2 From 1be0c25da56e860992af972a60321563ca2cfcd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 May 2013 14:24:24 -0700 Subject: workqueue: don't perform NUMA-aware allocations on offline nodes in wq_numa_init() wq_numa_init() builds per-node cpumasks which are later used to make unbound workqueues NUMA-aware. The cpumasks are allocated using alloc_cpumask_var_node() for all possible nodes. Unfortunately, on machines with off-line nodes, this leads to NUMA-aware allocations on existing bug offline nodes, which in turn triggers BUG in the memory allocation code. Fix it by using NUMA_NO_NODE for cpumask allocations for offline nodes. kernel BUG at include/linux/gfp.h:323! invalid opcode: 0000 [#1] SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.9.0+ #1 Hardware name: ProLiant BL465c G7, BIOS A19 12/10/2011 task: ffff880234608000 ti: ffff880234602000 task.ti: ffff880234602000 RIP: 0010:[] [] new_slab+0x2ad/0x340 RSP: 0000:ffff880234603bf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880237404b40 RCX: 00000000000000d0 RDX: 0000000000000001 RSI: 0000000000000003 RDI: 00000000002052d0 RBP: ffff880234603c28 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000001 R11: ffffffff812e3aa8 R12: 0000000000000001 R13: ffff8802378161c0 R14: 0000000000030027 R15: 00000000000040d0 FS: 0000000000000000(0000) GS:ffff880237800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff88043fdff000 CR3: 00000000018d5000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff880234603c28 0000000000000001 00000000000000d0 ffff8802378161c0 ffff880237404b40 ffff880237404b40 ffff880234603d28 ffffffff815edba1 ffff880237816140 0000000000000000 ffff88023740e1c0 Call Trace: [] __slab_alloc+0x330/0x4f2 [] kmem_cache_alloc_node_trace+0xa5/0x200 [] alloc_cpumask_var_node+0x28/0x90 [] wq_numa_init+0x10d/0x1be [] init_workqueues+0x64/0x341 [] do_one_initcall+0xea/0x1a0 [] kernel_init_freeable+0xb7/0x1ec [] kernel_init+0xe/0xf0 [] ret_from_fork+0x7c/0xb0 Code: 45 84 ac 00 00 00 f0 41 80 4d 00 40 e9 f6 fe ff ff 66 0f 1f 84 00 00 00 00 00 e8 eb 4b ff ff 49 89 c5 e9 05 fe ff ff <0f> 0b 4c 8b 73 38 44 89 ff 81 cf 00 00 20 00 4c 89 f6 48 c1 ee Signed-off-by: Tejun Heo Reported-and-Tested-by: Lingzhu Xiang --- kernel/workqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 02916f42138..ee8e29a2320 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4905,7 +4905,8 @@ static void __init wq_numa_init(void) BUG_ON(!tbl); for_each_node(node) - BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, node)); + BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, + node_online(node) ? node : NUMA_NO_NODE)); for_each_possible_cpu(cpu) { node = cpu_to_node(cpu); -- cgit v1.2.3-70-g09d2