From 24926dadc41cc566e974022b0e66231b82c6375f Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Thu, 1 Sep 2011 06:11:17 -0700 Subject: [SCSI] libsas: fix failure to revalidate domain for anything but the first expander child. In an enclosure model where there are chaining expanders to a large body of storage, it was discovered that libsas, responding to a broadcast event change, would only revalidate the domain of first child expander in the list. The issue is that the pointer value to the discovered source device was used to break out of the loop, rather than the content of the pointer. This still remains non-compliant as the revalidate domain code is supposed to loop through all child expanders, and not stop at the first one it finds that reports a change count. However, the design of this routine does not allow multiple device discoveries and that would be a more complicated set of patches reserved for another day. We are fixing the glaring bug rather than refactoring the code. Signed-off-by: Mark Salyzyn Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index f84084bba2f..c9e3dc024bc 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1721,7 +1721,7 @@ static int sas_find_bcast_dev(struct domain_device *dev, list_for_each_entry(ch, &ex->children, siblings) { if (ch->dev_type == EDGE_DEV || ch->dev_type == FANOUT_DEV) { res = sas_find_bcast_dev(ch, src_dev); - if (src_dev) + if (*src_dev) return res; } } -- cgit v1.2.3-70-g09d2 From a73914c35b05d80f8ce78288e10056c91090b666 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Thu, 22 Sep 2011 08:32:23 -0700 Subject: [SCSI] libsas: fix panic when single phy is disabled on a wide port When a wide port is being utilized to a target, if one disables only one of the phys, we get an OS crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000238 IP: [] mutex_lock+0x21/0x50 PGD 4103f5067 PUD 41dba9067 PMD 0 Oops: 0002 [#1] SMP last sysfs file: /sys/bus/pci/slots/5/address CPU 0 Modules linked in: pm8001(U) ses enclosure fuse nfsd exportfs autofs4 ipmi_devintf ipmi_si ipmi_msghandler nfs lockd fscache nfs_acl auth_rpcgss 8021q fcoe libfcoe garp libfc scsi_transport_fc stp scsi_tgt llc sunrpc cpufreq_ondemand acpi_cpufreq freq_table ipv6 sr_mod cdrom dm_mirror dm_region_hash dm_log uinput sg i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support e1000e mlx4_ib ib_mad ib_core mlx4_en mlx4_core ext3 jbd mbcache sd_mod crc_t10dif usb_storage ata_generic pata_acpi ata_piix libsas(U) scsi_transport_sas dm_mod [last unloaded: pm8001] Modules linked in: pm8001(U) ses enclosure fuse nfsd exportfs autofs4 ipmi_devintf ipmi_si ipmi_msghandler nfs lockd fscache nfs_acl auth_rpcgss 8021q fcoe libfcoe garp libfc scsi_transport_fc stp scsi_tgt llc sunrpc cpufreq_ondemand acpi_cpufreq freq_table ipv6 sr_mod cdrom dm_mirror dm_region_hash dm_log uinput sg i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support e1000e mlx4_ib ib_mad ib_core mlx4_en mlx4_core ext3 jbd mbcache sd_mod crc_t10dif usb_storage ata_generic pata_acpi ata_piix libsas(U) scsi_transport_sas dm_mod [last unloaded: pm8001] Pid: 5146, comm: scsi_wq_5 Not tainted 2.6.32-71.29.1.el6.lustre.7.x86_64 #1 Storage Server RIP: 0010:[] [] mutex_lock+0x21/0x50 RSP: 0018:ffff8803e4e33d30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000238 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8803e664c800 RDI: 0000000000000238 RBP: ffff8803e4e33d40 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000238 R14: ffff88041acb7200 R15: ffff88041c51ada0 FS: 0000000000000000(0000) GS:ffff880028200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000238 CR3: 0000000410143000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process scsi_wq_5 (pid: 5146, threadinfo ffff8803e4e32000, task ffff8803e4e294a0) Stack: ffff8803e664c800 0000000000000000 ffff8803e4e33d70 ffffffffa001f06e <0> ffff8803e4e33d60 ffff88041c51ada0 ffff88041acb7200 ffff88041bc0aa00 <0> ffff8803e4e33d90 ffffffffa0032b6c 0000000000000014 ffff88041acb7200 Call Trace: [] sas_port_delete_phy+0x2e/0xa0 [scsi_transport_sas] [] sas_unregister_devs_sas_addr+0xac/0xe0 [libsas] [] sas_ex_revalidate_domain+0x204/0x330 [libsas] [] ? sas_revalidate_domain+0x0/0x90 [libsas] [] sas_revalidate_domain+0x65/0x90 [libsas] [] worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] kthread+0x96/0xa0 [] child_rip+0xa/0x20 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 Code: ff ff 85 c0 75 ed eb d6 66 90 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 0f 1f 44 00 00 48 89 fb e8 92 f4 ff ff 48 89 df ff 0f 79 05 e8 25 00 00 00 65 48 8b 04 25 08 cc 00 00 48 2d RIP [] mutex_lock+0x21/0x50 RSP CR2: 0000000000000238 The following patch is admittedly a band-aid, and does not solve the root cause, but it still is a good candidate for hardening as a pointer check before reference. Signed-off-by: Mark Salyzyn Tested-by: Jack Wang Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index c9e3dc024bc..16ad97df5ba 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1769,10 +1769,12 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent, sas_disable_routing(parent, phy->attached_sas_addr); } memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); - sas_port_delete_phy(phy->port, phy->phy); - if (phy->port->num_phys == 0) - sas_port_delete(phy->port); - phy->port = NULL; + if (phy->port) { + sas_port_delete_phy(phy->port, phy->phy); + if (phy->port->num_phys == 0) + sas_port_delete(phy->port); + phy->port = NULL; + } } static int sas_discover_bfs_by_root_level(struct domain_device *root, -- cgit v1.2.3-70-g09d2