diff options
author | Robert Love <robert.w.love@intel.com> | 2013-03-25 11:00:27 -0700 |
---|---|---|
committer | Robert Love <robert.w.love@intel.com> | 2013-03-25 15:55:56 -0700 |
commit | f9c4358edb285cead00a0d6cf0644c84ee773026 (patch) | |
tree | 7ba4fd79620f04e1af563d573b57b8b94a28cd43 | |
parent | 01bdcb626f80838f55ec2af993daef50b9c59c49 (diff) |
fcoe: Fix deadlock between create and destroy paths
We can deadlock (s_active and fcoe_config_mutex) if a
port is being destroyed at the same time one is being created.
[ 4200.503113] ======================================================
[ 4200.503114] [ INFO: possible circular locking dependency detected ]
[ 4200.503116] 3.8.0-rc5+ #8 Not tainted
[ 4200.503117] -------------------------------------------------------
[ 4200.503118] kworker/3:2/2492 is trying to acquire lock:
[ 4200.503119] (s_active#292){++++.+}, at: [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70
[ 4200.503127]
but task is already holding lock:
[ 4200.503128] (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02f3338>] fcoe_destroy_work+0xe8/0x120 [fcoe]
[ 4200.503133]
which lock already depends on the new lock.
[ 4200.503135]
the existing dependency chain (in reverse order) is:
[ 4200.503136]
-> #1 (fcoe_config_mutex){+.+.+.}:
[ 4200.503139] [<ffffffff810c7711>] lock_acquire+0xa1/0x140
[ 4200.503143] [<ffffffff816ca7be>] mutex_lock_nested+0x6e/0x360
[ 4200.503146] [<ffffffffa02f11bd>] fcoe_enable+0x1d/0xb0 [fcoe]
[ 4200.503148] [<ffffffffa02f127d>] fcoe_ctlr_enabled+0x2d/0x50 [fcoe]
[ 4200.503151] [<ffffffffa02ffbe8>] store_ctlr_enabled+0x38/0x90 [libfcoe]
[ 4200.503154] [<ffffffff81424878>] dev_attr_store+0x18/0x30
[ 4200.503157] [<ffffffff8122b750>] sysfs_write_file+0xe0/0x150
[ 4200.503160] [<ffffffff811b334c>] vfs_write+0xac/0x180
[ 4200.503162] [<ffffffff811b3692>] sys_write+0x52/0xa0
[ 4200.503164] [<ffffffff816d7159>] system_call_fastpath+0x16/0x1b
[ 4200.503167]
-> #0 (s_active#292){++++.+}:
[ 4200.503170] [<ffffffff810c680f>] __lock_acquire+0x135f/0x1c90
[ 4200.503172] [<ffffffff810c7711>] lock_acquire+0xa1/0x140
[ 4200.503174] [<ffffffff8122c626>] sysfs_deactivate+0x116/0x160
[ 4200.503176] [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70
[ 4200.503178] [<ffffffff8122b2eb>] sysfs_hash_and_remove+0x5b/0xb0
[ 4200.503180] [<ffffffff8122f3d1>] sysfs_remove_group+0x61/0x100
[ 4200.503183] [<ffffffff814251eb>] device_remove_groups+0x3b/0x60
[ 4200.503185] [<ffffffff81425534>] device_remove_attrs+0x44/0x80
[ 4200.503187] [<ffffffff81425e97>] device_del+0x127/0x1c0
[ 4200.503189] [<ffffffff81425f52>] device_unregister+0x22/0x60
[ 4200.503191] [<ffffffffa0300970>] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe]
[ 4200.503194] [<ffffffffa02f1b5c>] fcoe_interface_cleanup+0x6c/0xa0 [fcoe]
[ 4200.503196] [<ffffffffa02f3355>] fcoe_destroy_work+0x105/0x120 [fcoe]
[ 4200.503198] [<ffffffff8107ee91>] process_one_work+0x1a1/0x580
[ 4200.503203] [<ffffffff81080c6e>] worker_thread+0x15e/0x440
[ 4200.503205] [<ffffffff8108715a>] kthread+0xea/0xf0
[ 4200.503207] [<ffffffff816d70ac>] ret_from_fork+0x7c/0xb0
[ 4200.503209]
other info that might help us debug this:
[ 4200.503211] Possible unsafe locking scenario:
[ 4200.503212] CPU0 CPU1
[ 4200.503213] ---- ----
[ 4200.503214] lock(fcoe_config_mutex);
[ 4200.503215] lock(s_active#292);
[ 4200.503218] lock(fcoe_config_mutex);
[ 4200.503219] lock(s_active#292);
[ 4200.503221]
*** DEADLOCK ***
[ 4200.503223] 3 locks held by kworker/3:2/2492:
[ 4200.503224] #0: (fcoe){.+.+.+}, at: [<ffffffff8107ee2b>] process_one_work+0x13b/0x580
[ 4200.503228] #1: ((&port->destroy_work)){+.+.+.}, at: [<ffffffff8107ee2b>] process_one_work+0x13b/0x580
[ 4200.503232] #2: (fcoe_config_mutex){+.+.+.}, at: [<ffffffffa02f3338>] fcoe_destroy_work+0xe8/0x120 [fcoe]
[ 4200.503236]
stack backtrace:
[ 4200.503238] Pid: 2492, comm: kworker/3:2 Not tainted 3.8.0-rc5+ #8
[ 4200.503240] Call Trace:
[ 4200.503243] [<ffffffff816c2f09>] print_circular_bug+0x1fb/0x20c
[ 4200.503246] [<ffffffff810c680f>] __lock_acquire+0x135f/0x1c90
[ 4200.503248] [<ffffffff810c463a>] ? debug_check_no_locks_freed+0x9a/0x180
[ 4200.503250] [<ffffffff810c7711>] lock_acquire+0xa1/0x140
[ 4200.503253] [<ffffffff8122d20b>] ? sysfs_addrm_finish+0x3b/0x70
[ 4200.503255] [<ffffffff8122c626>] sysfs_deactivate+0x116/0x160
[ 4200.503258] [<ffffffff8122d20b>] ? sysfs_addrm_finish+0x3b/0x70
[ 4200.503260] [<ffffffff8122d20b>] sysfs_addrm_finish+0x3b/0x70
[ 4200.503262] [<ffffffff8122b2eb>] sysfs_hash_and_remove+0x5b/0xb0
[ 4200.503265] [<ffffffff8122f3d1>] sysfs_remove_group+0x61/0x100
[ 4200.503273] [<ffffffff814251eb>] device_remove_groups+0x3b/0x60
[ 4200.503275] [<ffffffff81425534>] device_remove_attrs+0x44/0x80
[ 4200.503277] [<ffffffff81425e97>] device_del+0x127/0x1c0
[ 4200.503279] [<ffffffff81425f52>] device_unregister+0x22/0x60
[ 4200.503282] [<ffffffffa0300970>] fcoe_ctlr_device_delete+0xe0/0xf0 [libfcoe]
[ 4200.503285] [<ffffffffa02f1b5c>] fcoe_interface_cleanup+0x6c/0xa0 [fcoe]
[ 4200.503287] [<ffffffffa02f3355>] fcoe_destroy_work+0x105/0x120 [fcoe]
[ 4200.503290] [<ffffffff8107ee91>] process_one_work+0x1a1/0x580
[ 4200.503292] [<ffffffff8107ee2b>] ? process_one_work+0x13b/0x580
[ 4200.503295] [<ffffffffa02f3250>] ? fcoe_if_destroy+0x230/0x230 [fcoe]
[ 4200.503297] [<ffffffff81080c6e>] worker_thread+0x15e/0x440
[ 4200.503299] [<ffffffff81080b10>] ? busy_worker_rebind_fn+0x100/0x100
[ 4200.503301] [<ffffffff8108715a>] kthread+0xea/0xf0
[ 4200.503304] [<ffffffff81087070>] ? kthread_create_on_node+0x160/0x160
[ 4200.503306] [<ffffffff816d70ac>] ret_from_fork+0x7c/0xb0
[ 4200.503308] [<ffffffff81087070>] ? kthread_create_on_node+0x160/0x160
Signed-off-by: Robert Love <robert.w.love@intel.com>
Tested-by: Jack Morgan <jack.morgan@intel.com>
-rw-r--r-- | drivers/scsi/fcoe/fcoe.c | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index b5d92fc93c7..9bfdc9a3f89 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -490,7 +490,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) { struct net_device *netdev = fcoe->netdev; struct fcoe_ctlr *fip = fcoe_to_ctlr(fcoe); - struct fcoe_ctlr_device *ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip); rtnl_lock(); if (!fcoe->removed) @@ -501,7 +500,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) /* tear-down the FCoE controller */ fcoe_ctlr_destroy(fip); scsi_host_put(fip->lp->host); - fcoe_ctlr_device_delete(ctlr_dev); dev_put(netdev); module_put(THIS_MODULE); } @@ -2194,6 +2192,8 @@ out_nodev: */ static void fcoe_destroy_work(struct work_struct *work) { + struct fcoe_ctlr_device *cdev; + struct fcoe_ctlr *ctlr; struct fcoe_port *port; struct fcoe_interface *fcoe; struct Scsi_Host *shost; @@ -2224,10 +2224,15 @@ static void fcoe_destroy_work(struct work_struct *work) mutex_lock(&fcoe_config_mutex); fcoe = port->priv; + ctlr = fcoe_to_ctlr(fcoe); + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); + fcoe_if_destroy(port->lport); fcoe_interface_cleanup(fcoe); mutex_unlock(&fcoe_config_mutex); + + fcoe_ctlr_device_delete(cdev); } /** @@ -2335,7 +2340,9 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, rc = -EIO; rtnl_unlock(); fcoe_interface_cleanup(fcoe); - goto out_nortnl; + mutex_unlock(&fcoe_config_mutex); + fcoe_ctlr_device_delete(ctlr_dev); + goto out; } /* Make this the "master" N_Port */ @@ -2375,8 +2382,8 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, out_nodev: rtnl_unlock(); -out_nortnl: mutex_unlock(&fcoe_config_mutex); +out: return rc; } |