From aedf349773e5877d716a89368d512b9baa3e8c7b Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Apr 2006 10:14:05 -0400 Subject: [SCSI] FC transport: fixes for workq deadlocks As previously reported via Michael Reed, the FC transport took a hit in 2.6.15 (perhaps a little earlier) when we solved a recursion error. There are 2 deadlocks occurring: - With scan and the delete items sharing the same workq, flushing the workq for the delete code was getting it stalled behind a very long running scan code path. - There's a deadlock where scsi_remove_target() has to sit behind scsi_scan_target() due to contention over the scan_lock(). This patch resolves the 1st deadlock and significantly reduces the odds of the second. So far, we have only replicated the 2nd deadlock on a highly-parallel SMP system. More on the 2nd deadlock in a following email. This patch reworks the transport to: - Only use the scsi host workq for scanning - Use 2 other workq's internally. One for deletions, the other for scheduled deletions. Originally, we tried this with a single workq, but the occassional flushes of the scheduled queues was hitting the second deadlock with a slightly higher frequency. In the future, we'll look at the LLDD's and the transport to see if we can get rid of this extra overhead. - When moving to the other workq's we tightened up some object states and some lock handling. - Properly syncs adds/deletes - minor code cleanups - directly reference fc_host_attrs, rather than through attribute macros - flush the right workq on delayed work cancel failures. Large kudos to Michael Reed who has been working this issue for the last month. Signed-off-by: James Bottomley --- include/scsi/scsi_transport_fc.h | 41 +++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) (limited to 'include/scsi') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index cf3fec8be1e..5626225bd3a 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -202,12 +202,19 @@ struct fc_rport { /* aka fc_starget_attrs */ /* internal data */ unsigned int channel; u32 number; + u8 flags; struct list_head peers; struct device dev; struct work_struct dev_loss_work; struct work_struct scan_work; + struct work_struct stgt_delete_work; + struct work_struct rport_delete_work; } __attribute__((aligned(sizeof(unsigned long)))); +/* bit field values for struct fc_rport "flags" field: */ +#define FC_RPORT_DEVLOSS_PENDING 0x01 +#define FC_RPORT_SCAN_PENDING 0x02 + #define dev_to_rport(d) \ container_of(d, struct fc_rport, dev) #define transport_class_to_rport(classdev) \ @@ -327,13 +334,16 @@ struct fc_host_attrs { struct list_head rport_bindings; u32 next_rport_number; u32 next_target_id; - u8 flags; - struct work_struct rport_del_work; -}; -/* values for struct fc_host_attrs "flags" field: */ -#define FC_SHOST_RPORT_DEL_SCHEDULED 0x01 + /* work queues for rport state manipulation */ + char work_q_name[KOBJ_NAME_LEN]; + struct workqueue_struct *work_q; + char devloss_work_q_name[KOBJ_NAME_LEN]; + struct workqueue_struct *devloss_work_q; +}; +#define shost_to_fc_host(x) \ + ((struct fc_host_attrs *)(x)->shost_data) #define fc_host_node_name(x) \ (((struct fc_host_attrs *)(x)->shost_data)->node_name) @@ -375,10 +385,14 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number) #define fc_host_next_target_id(x) \ (((struct fc_host_attrs *)(x)->shost_data)->next_target_id) -#define fc_host_flags(x) \ - (((struct fc_host_attrs *)(x)->shost_data)->flags) -#define fc_host_rport_del_work(x) \ - (((struct fc_host_attrs *)(x)->shost_data)->rport_del_work) +#define fc_host_work_q_name(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->work_q_name) +#define fc_host_work_q(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->work_q) +#define fc_host_devloss_work_q_name(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name) +#define fc_host_devloss_work_q(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) /* The functions by which the transport class and the driver communicate */ @@ -461,10 +475,15 @@ fc_remote_port_chkready(struct fc_rport *rport) switch (rport->port_state) { case FC_PORTSTATE_ONLINE: - result = 0; + if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) + result = 0; + else if (rport->flags & FC_RPORT_DEVLOSS_PENDING) + result = DID_IMM_RETRY << 16; + else + result = DID_NO_CONNECT << 16; break; case FC_PORTSTATE_BLOCKED: - result = DID_BUS_BUSY << 16; + result = DID_IMM_RETRY << 16; break; default: result = DID_NO_CONNECT << 16; -- cgit v1.2.3-70-g09d2