From fe4a36cf652031d2744a536ba5121032840380cb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 30 Oct 2006 15:18:56 -0800 Subject: [SCSI] aic94xx: handle REQ_TASK_ABORT This patch straightens out the code that distinguishes the various escb opcodes in escb_tasklet_complete so that they can be handled correctly. It also provides all the necessary code to create a workqueue item that tells libsas to abort a sas_task. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_scb.c | 83 ++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 17 deletions(-) (limited to 'drivers/scsi/aic94xx') diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c index 7ee49b51b72..1911c5d1787 100644 --- a/drivers/scsi/aic94xx/aic94xx_scb.c +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -25,6 +25,7 @@ */ #include +#include #include "aic94xx.h" #include "aic94xx_reg.h" @@ -342,6 +343,18 @@ void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id) } } +/* start up the ABORT TASK tmf... */ +static void task_kill_later(struct asd_ascb *ascb) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct sas_ha_struct *sas_ha = &asd_ha->sas_ha; + struct Scsi_Host *shost = sas_ha->core.shost; + struct sas_task *task = ascb->uldd_task; + + INIT_WORK(&task->abort_work, (void (*)(void *))sas_task_abort, task); + queue_work(shost->work_q, &task->abort_work); +} + static void escb_tasklet_complete(struct asd_ascb *ascb, struct done_list_struct *dl) { @@ -368,6 +381,58 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, ascb->scb->header.opcode); } + /* Catch these before we mask off the sb_opcode bits */ + switch (sb_opcode) { + case REQ_TASK_ABORT: { + struct asd_ascb *a, *b; + u16 tc_abort; + + tc_abort = *((u16*)(&dl->status_block[1])); + tc_abort = le16_to_cpu(tc_abort); + + ASD_DPRINTK("%s: REQ_TASK_ABORT, reason=0x%X\n", + __FUNCTION__, dl->status_block[3]); + + /* Find the pending task and abort it. */ + list_for_each_entry_safe(a, b, &asd_ha->seq.pend_q, list) + if (a->tc_index == tc_abort) { + task_kill_later(a); + break; + } + goto out; + } + case REQ_DEVICE_RESET: { + struct asd_ascb *a, *b; + u16 conn_handle; + + conn_handle = *((u16*)(&dl->status_block[1])); + conn_handle = le16_to_cpu(conn_handle); + + ASD_DPRINTK("%s: REQ_DEVICE_RESET, reason=0x%X\n", __FUNCTION__, + dl->status_block[3]); + + /* Kill all pending tasks and reset the device */ + list_for_each_entry_safe(a, b, &asd_ha->seq.pend_q, list) { + struct sas_task *task = a->uldd_task; + struct domain_device *dev = task->dev; + u16 x; + + x = *((u16*)(&dev->lldd_dev)); + if (x == conn_handle) + task_kill_later(a); + } + + /* FIXME: Reset device port (huh?) */ + goto out; + } + case SIGNAL_NCQ_ERROR: + ASD_DPRINTK("%s: SIGNAL_NCQ_ERROR\n", __FUNCTION__); + goto out; + case CLEAR_NCQ_ERROR: + ASD_DPRINTK("%s: CLEAR_NCQ_ERROR\n", __FUNCTION__); + goto out; + } + sb_opcode &= ~DL_PHY_MASK; switch (sb_opcode) { @@ -397,22 +462,6 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, sas_phy_disconnected(sas_phy); sas_ha->notify_port_event(sas_phy, PORTE_TIMER_EVENT); break; - case REQ_TASK_ABORT: - ASD_DPRINTK("%s: phy%d: REQ_TASK_ABORT\n", __FUNCTION__, - phy_id); - break; - case REQ_DEVICE_RESET: - ASD_DPRINTK("%s: phy%d: REQ_DEVICE_RESET\n", __FUNCTION__, - phy_id); - break; - case SIGNAL_NCQ_ERROR: - ASD_DPRINTK("%s: phy%d: SIGNAL_NCQ_ERROR\n", __FUNCTION__, - phy_id); - break; - case CLEAR_NCQ_ERROR: - ASD_DPRINTK("%s: phy%d: CLEAR_NCQ_ERROR\n", __FUNCTION__, - phy_id); - break; default: ASD_DPRINTK("%s: phy%d: unknown event:0x%x\n", __FUNCTION__, phy_id, sb_opcode); @@ -432,7 +481,7 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, break; } - +out: asd_invalidate_edb(ascb, edb); } -- cgit v1.2.3-70-g09d2 From dea22214790d1306f3a3444db13d2c726037b189 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 7 Nov 2006 17:28:55 -0800 Subject: [PATCH] aic94xx: handle REQ_DEVICE_RESET This patch implements a REQ_DEVICE_RESET handler for the aic94xx driver. Like the earlier REQ_TASK_ABORT patch, this patch defers the device reset to the Scsi_Host's workqueue, which has the added benefit of ensuring that the device reset does not happen at the same time that the abort tmfs are being processed. After the phy reset, the busted drive should go away and be re-detected later, which is indeed what I've seen on both a x260 and a x206m. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_scb.c | 51 ++++++++++++++++++++++++++++++++----- drivers/scsi/libsas/sas_init.c | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 1 + include/scsi/libsas.h | 1 + include/scsi/scsi_transport_sas.h | 2 ++ 5 files changed, 49 insertions(+), 8 deletions(-) (limited to 'drivers/scsi/aic94xx') diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c index 1911c5d1787..a014418d670 100644 --- a/drivers/scsi/aic94xx/aic94xx_scb.c +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -343,6 +343,27 @@ void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id) } } +/* hard reset a phy later */ +static void do_phy_reset_later(void *data) +{ + struct sas_phy *sas_phy = data; + int error; + + ASD_DPRINTK("%s: About to hard reset phy %d\n", __FUNCTION__, + sas_phy->identify.phy_identifier); + /* Reset device port */ + error = sas_phy_reset(sas_phy, 1); + if (error) + ASD_DPRINTK("%s: Hard reset of phy %d failed (%d).\n", + __FUNCTION__, sas_phy->identify.phy_identifier, error); +} + +static void phy_reset_later(struct sas_phy *sas_phy, struct Scsi_Host *shost) +{ + INIT_WORK(&sas_phy->reset_work, do_phy_reset_later, sas_phy); + queue_work(shost->work_q, &sas_phy->reset_work); +} + /* start up the ABORT TASK tmf... */ static void task_kill_later(struct asd_ascb *ascb) { @@ -402,7 +423,9 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, goto out; } case REQ_DEVICE_RESET: { - struct asd_ascb *a, *b; + struct Scsi_Host *shost = sas_ha->core.shost; + struct sas_phy *dev_phy; + struct asd_ascb *a; u16 conn_handle; conn_handle = *((u16*)(&dl->status_block[1])); @@ -412,17 +435,31 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, dl->status_block[3]); /* Kill all pending tasks and reset the device */ - list_for_each_entry_safe(a, b, &asd_ha->seq.pend_q, list) { - struct sas_task *task = a->uldd_task; - struct domain_device *dev = task->dev; + dev_phy = NULL; + list_for_each_entry(a, &asd_ha->seq.pend_q, list) { + struct sas_task *task; + struct domain_device *dev; u16 x; - x = *((u16*)(&dev->lldd_dev)); - if (x == conn_handle) + task = a->uldd_task; + if (!task) + continue; + dev = task->dev; + + x = (u16)dev->lldd_dev; + if (x == conn_handle) { + dev_phy = dev->port->phy; task_kill_later(a); + } } - /* FIXME: Reset device port (huh?) */ + /* Reset device port */ + if (!dev_phy) { + ASD_DPRINTK("%s: No pending commands; can't reset.\n", + __FUNCTION__); + goto out; + } + phy_reset_later(dev_phy, shost); goto out; } case SIGNAL_NCQ_ERROR: diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index a2b479a6590..0fb347b4b1a 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -144,7 +144,7 @@ static int sas_get_linkerrors(struct sas_phy *phy) return sas_smp_get_phy_events(phy); } -static int sas_phy_reset(struct sas_phy *phy, int hard_reset) +int sas_phy_reset(struct sas_phy *phy, int hard_reset) { int ret; enum phy_func reset_type; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index c5fd3752272..e064aac06b9 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -865,3 +865,4 @@ EXPORT_SYMBOL_GPL(sas_change_queue_depth); EXPORT_SYMBOL_GPL(sas_change_queue_type); EXPORT_SYMBOL_GPL(sas_bios_param); EXPORT_SYMBOL_GPL(sas_task_abort); +EXPORT_SYMBOL_GPL(sas_phy_reset); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index a1fc20a47c5..29f6e1af1bf 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -597,6 +597,7 @@ struct sas_domain_function_template { extern int sas_register_ha(struct sas_ha_struct *); extern int sas_unregister_ha(struct sas_ha_struct *); +int sas_phy_reset(struct sas_phy *phy, int hard_reset); extern int sas_queuecommand(struct scsi_cmnd *, void (*scsi_done)(struct scsi_cmnd *)); extern int sas_target_alloc(struct scsi_target *); diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 53024377f3b..59633a82de4 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -73,6 +73,8 @@ struct sas_phy { /* for the list of phys belonging to a port */ struct list_head port_siblings; + + struct work_struct reset_work; }; #define dev_to_phy(d) \ -- cgit v1.2.3-70-g09d2 From 7b4feee973ca7be63345b92a987ef7ef879b179b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 14 Nov 2006 18:02:07 -0800 Subject: [PATCH] aic94xx: delete ascb timers when freeing queues When the aic94xx driver creates ascbs, each ascb is initialized with a timeout timer. If there are any ascbs left over when the driver is being torn down, these timers need to be deleted. In particular, we seem to hit this case when ascbs are issued yet never end up on the done list. Right now there's a sequencer bug that results in this happening every so often. CONTROL PHY commands are typically sent when things are really messed up with the sequencer; however, any other leftover ascb should produce loud warnings. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_init.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/scsi/aic94xx') diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 99743ca29ca..3a5bbba3976 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -724,6 +724,15 @@ static void asd_free_queues(struct asd_ha_struct *asd_ha) list_for_each_safe(pos, n, &pending) { struct asd_ascb *ascb = list_entry(pos, struct asd_ascb, list); + /* + * Delete unexpired ascb timers. This may happen if we issue + * a CONTROL PHY scb to an adapter and rmmod before the scb + * times out. Apparently we don't wait for the CONTROL PHY + * to complete, so it doesn't matter if we kill the timer. + */ + del_timer_sync(&ascb->timer); + WARN_ON(ascb->scb->header.opcode != CONTROL_PHY); + list_del_init(pos); ASD_DPRINTK("freeing from pending\n"); asd_ascb_free(ascb); -- cgit v1.2.3-70-g09d2 From e138a5d2356729b8752e88520cc1525fae9794ac Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 22 Nov 2006 11:54:15 -0600 Subject: [SCSI] aic94xx: fix pointer to integer conversion warning Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_scb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi/aic94xx') diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c index a014418d670..52c6ea4fbf7 100644 --- a/drivers/scsi/aic94xx/aic94xx_scb.c +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -446,7 +446,7 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, continue; dev = task->dev; - x = (u16)dev->lldd_dev; + x = (unsigned long)dev->lldd_dev; if (x == conn_handle) { dev_phy = dev->port->phy; task_kill_later(a); -- cgit v1.2.3-70-g09d2