summaryrefslogtreecommitdiffstats
path: root/drivers/scsi/scsi_error.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/scsi_error.c')
-rw-r--r--drivers/scsi/scsi_error.c231
1 files changed, 104 insertions, 127 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 1de30eb83bb..abea2cf05c2 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -3,14 +3,14 @@
*
* SCSI error/timeout handling
* Initial versions: Eric Youngdale. Based upon conversations with
- * Leonard Zubkoff and David Miller at Linux Expo,
+ * Leonard Zubkoff and David Miller at Linux Expo,
* ideas originating from all over the place.
*
* Restructured scsi_unjam_host and associated functions.
* September 04, 2002 Mike Anderson (andmike@us.ibm.com)
*
* Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
- * minor cleanups.
+ * minor cleanups.
* September 30, 2002 Mike Anderson (andmike@us.ibm.com)
*/
@@ -129,14 +129,15 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
{
struct scsi_cmnd *scmd = req->special;
enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
+ struct Scsi_Host *host = scmd->device->host;
trace_scsi_dispatch_cmd_timeout(scmd);
scsi_log_completion(scmd, TIMEOUT_ERROR);
- if (scmd->device->host->transportt->eh_timed_out)
- rtn = scmd->device->host->transportt->eh_timed_out(scmd);
- else if (scmd->device->host->hostt->eh_timed_out)
- rtn = scmd->device->host->hostt->eh_timed_out(scmd);
+ if (host->transportt->eh_timed_out)
+ rtn = host->transportt->eh_timed_out(scmd);
+ else if (host->hostt->eh_timed_out)
+ rtn = host->hostt->eh_timed_out(scmd);
if (unlikely(rtn == BLK_EH_NOT_HANDLED &&
!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
@@ -195,7 +196,7 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
++total_failures;
if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)
++cmd_cancel;
- else
+ else
++cmd_failed;
}
}
@@ -214,7 +215,7 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
" devices require eh work\n",
- total_failures, devices_failed));
+ total_failures, devices_failed));
}
#endif
@@ -223,7 +224,7 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
* @scmd: Cmd to have sense checked.
*
* Return value:
- * SUCCESS or FAILED or NEEDS_RETRY
+ * SUCCESS or FAILED or NEEDS_RETRY or TARGET_ERROR
*
* Notes:
* When a deferred error is detected the current command has
@@ -294,7 +295,7 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
return NEEDS_RETRY;
}
/*
- * if the device is in the process of becoming ready, we
+ * if the device is in the process of becoming ready, we
* should retry.
*/
if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
@@ -320,31 +321,31 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
"changed. The Linux SCSI layer does not "
"automatically adjust these parameters.\n");
- if (scmd->request->cmd_flags & REQ_HARDBARRIER)
- /*
- * barrier requests should always retry on UA
- * otherwise block will get a spurious error
- */
- return NEEDS_RETRY;
- else
- /*
- * for normal (non barrier) commands, pass the
- * UA upwards for a determination in the
- * completion functions
- */
- return SUCCESS;
+ if (sshdr.asc == 0x38 && sshdr.ascq == 0x07)
+ scmd_printk(KERN_WARNING, scmd,
+ "Warning! Received an indication that the "
+ "LUN reached a thin provisioning soft "
+ "threshold.\n");
+
+ /*
+ * Pass the UA upwards for a determination in the completion
+ * functions.
+ */
+ return SUCCESS;
- /* these three are not supported */
+ /* these are not supported */
case COPY_ABORTED:
case VOLUME_OVERFLOW:
case MISCOMPARE:
- return SUCCESS;
+ case BLANK_CHECK:
+ case DATA_PROTECT:
+ return TARGET_ERROR;
case MEDIUM_ERROR:
if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */
sshdr.asc == 0x13 || /* AMNF DATA FIELD */
sshdr.asc == 0x14) { /* RECORD NOT FOUND */
- return SUCCESS;
+ return TARGET_ERROR;
}
return NEEDS_RETRY;
@@ -352,11 +353,9 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
if (scmd->device->retry_hwerror)
return ADD_TO_MLQUEUE;
else
- return SUCCESS;
+ return TARGET_ERROR;
case ILLEGAL_REQUEST:
- case BLANK_CHECK:
- case DATA_PROTECT:
default:
return SUCCESS;
}
@@ -496,7 +495,7 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
*/
static void scsi_eh_done(struct scsi_cmnd *scmd)
{
- struct completion *eh_action;
+ struct completion *eh_action;
SCSI_LOG_ERROR_RECOVERY(3,
printk("%s scmd: %p result: %x\n",
@@ -515,22 +514,23 @@ static int scsi_try_host_reset(struct scsi_cmnd *scmd)
{
unsigned long flags;
int rtn;
+ struct Scsi_Host *host = scmd->device->host;
+ struct scsi_host_template *hostt = host->hostt;
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
__func__));
- if (!scmd->device->host->hostt->eh_host_reset_handler)
+ if (!hostt->eh_host_reset_handler)
return FAILED;
- rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
+ rtn = hostt->eh_host_reset_handler(scmd);
if (rtn == SUCCESS) {
- if (!scmd->device->host->hostt->skip_settle_delay)
+ if (!hostt->skip_settle_delay)
ssleep(HOST_RESET_SETTLE_TIME);
- spin_lock_irqsave(scmd->device->host->host_lock, flags);
- scsi_report_bus_reset(scmd->device->host,
- scmd_channel(scmd));
- spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
+ spin_lock_irqsave(host->host_lock, flags);
+ scsi_report_bus_reset(host, scmd_channel(scmd));
+ spin_unlock_irqrestore(host->host_lock, flags);
}
return rtn;
@@ -544,22 +544,23 @@ static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
{
unsigned long flags;
int rtn;
+ struct Scsi_Host *host = scmd->device->host;
+ struct scsi_host_template *hostt = host->hostt;
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
__func__));
- if (!scmd->device->host->hostt->eh_bus_reset_handler)
+ if (!hostt->eh_bus_reset_handler)
return FAILED;
- rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
+ rtn = hostt->eh_bus_reset_handler(scmd);
if (rtn == SUCCESS) {
- if (!scmd->device->host->hostt->skip_settle_delay)
+ if (!hostt->skip_settle_delay)
ssleep(BUS_RESET_SETTLE_TIME);
- spin_lock_irqsave(scmd->device->host->host_lock, flags);
- scsi_report_bus_reset(scmd->device->host,
- scmd_channel(scmd));
- spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
+ spin_lock_irqsave(host->host_lock, flags);
+ scsi_report_bus_reset(host, scmd_channel(scmd));
+ spin_unlock_irqrestore(host->host_lock, flags);
}
return rtn;
@@ -585,16 +586,18 @@ static int scsi_try_target_reset(struct scsi_cmnd *scmd)
{
unsigned long flags;
int rtn;
+ struct Scsi_Host *host = scmd->device->host;
+ struct scsi_host_template *hostt = host->hostt;
- if (!scmd->device->host->hostt->eh_target_reset_handler)
+ if (!hostt->eh_target_reset_handler)
return FAILED;
- rtn = scmd->device->host->hostt->eh_target_reset_handler(scmd);
+ rtn = hostt->eh_target_reset_handler(scmd);
if (rtn == SUCCESS) {
- spin_lock_irqsave(scmd->device->host->host_lock, flags);
+ spin_lock_irqsave(host->host_lock, flags);
__starget_for_each_device(scsi_target(scmd->device), NULL,
__scsi_report_device_reset);
- spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
+ spin_unlock_irqrestore(host->host_lock, flags);
}
return rtn;
@@ -613,49 +616,28 @@ static int scsi_try_target_reset(struct scsi_cmnd *scmd)
static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
{
int rtn;
+ struct scsi_host_template *hostt = scmd->device->host->hostt;
- if (!scmd->device->host->hostt->eh_device_reset_handler)
+ if (!hostt->eh_device_reset_handler)
return FAILED;
- rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
+ rtn = hostt->eh_device_reset_handler(scmd);
if (rtn == SUCCESS)
__scsi_report_device_reset(scmd->device, NULL);
return rtn;
}
-static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
+static int scsi_try_to_abort_cmd(struct scsi_host_template *hostt, struct scsi_cmnd *scmd)
{
- if (!scmd->device->host->hostt->eh_abort_handler)
+ if (!hostt->eh_abort_handler)
return FAILED;
- return scmd->device->host->hostt->eh_abort_handler(scmd);
-}
-
-/**
- * scsi_try_to_abort_cmd - Ask host to abort a running command.
- * @scmd: SCSI cmd to abort from Lower Level.
- *
- * Notes:
- * This function will not return until the user's completion function
- * has been called. there is no timeout on this operation. if the
- * author of the low-level driver wishes this operation to be timed,
- * they can provide this facility themselves. helper functions in
- * scsi_error.c can be supplied to make this easier to do.
- */
-static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
-{
- /*
- * scsi_done was called just after the command timed out and before
- * we had a chance to process it. (db)
- */
- if (scmd->serial_number == 0)
- return SUCCESS;
- return __scsi_try_to_abort_cmd(scmd);
+ return hostt->eh_abort_handler(scmd);
}
static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)
{
- if (__scsi_try_to_abort_cmd(scmd) != SUCCESS)
+ if (scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd) != SUCCESS)
if (scsi_try_bus_device_reset(scmd) != SUCCESS)
if (scsi_try_target_reset(scmd) != SUCCESS)
if (scsi_try_bus_reset(scmd) != SUCCESS)
@@ -781,17 +763,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
struct Scsi_Host *shost = sdev->host;
DECLARE_COMPLETION_ONSTACK(done);
unsigned long timeleft;
- unsigned long flags;
struct scsi_eh_save ses;
int rtn;
scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes);
shost->eh_action = &done;
- spin_lock_irqsave(shost->host_lock, flags);
scsi_log_send(scmd);
- shost->hostt->queuecommand(scmd, scsi_eh_done);
- spin_unlock_irqrestore(shost->host_lock, flags);
+ scmd->scsi_done = scsi_eh_done;
+ shost->hostt->queuecommand(shost, scmd);
timeleft = wait_for_completion_timeout(&done, timeout);
@@ -819,6 +799,7 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
case SUCCESS:
case NEEDS_RETRY:
case FAILED:
+ case TARGET_ERROR:
break;
case ADD_TO_MLQUEUE:
rtn = NEEDS_RETRY;
@@ -877,7 +858,7 @@ EXPORT_SYMBOL(scsi_eh_finish_cmd);
*
* Description:
* See if we need to request sense information. if so, then get it
- * now, so we have a better idea of what to do.
+ * now, so we have a better idea of what to do.
*
* Notes:
* This has the unfortunate side effect that if a shost adapter does
@@ -989,7 +970,7 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
"0x%p\n", current->comm,
scmd));
- rtn = scsi_try_to_abort_cmd(scmd);
+ rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd);
if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
if (!scsi_device_online(scmd->device) ||
@@ -997,7 +978,6 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
!scsi_eh_tur(scmd)) {
scsi_eh_finish_cmd(scmd, done_q);
}
-
} else
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
" cmd failed:"
@@ -1041,7 +1021,7 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
*
* Notes:
* If commands are failing due to not ready, initializing command required,
- * try revalidating the device, which will end up sending a start unit.
+ * try revalidating the device, which will end up sending a start unit.
*/
static int scsi_eh_stu(struct Scsi_Host *shost,
struct list_head *work_q,
@@ -1095,7 +1075,7 @@ static int scsi_eh_stu(struct Scsi_Host *shost,
* Try a bus device reset. Still, look to see whether we have multiple
* devices that are jammed or not - if we have multiple devices, it
* makes no sense to try bus_device_reset - we really would need to try
- * a bus_reset instead.
+ * a bus_reset instead.
*/
static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
struct list_head *work_q,
@@ -1156,57 +1136,46 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost,
struct list_head *work_q,
struct list_head *done_q)
{
- struct scsi_cmnd *scmd, *tgtr_scmd, *next;
- unsigned int id = 0;
- int rtn;
+ LIST_HEAD(tmp_list);
- do {
- tgtr_scmd = NULL;
- list_for_each_entry(scmd, work_q, eh_entry) {
- if (id == scmd_id(scmd)) {
- tgtr_scmd = scmd;
- break;
- }
- }
- if (!tgtr_scmd) {
- /* not one exactly equal; find the next highest */
- list_for_each_entry(scmd, work_q, eh_entry) {
- if (scmd_id(scmd) > id &&
- (!tgtr_scmd ||
- scmd_id(tgtr_scmd) > scmd_id(scmd)))
- tgtr_scmd = scmd;
- }
- }
- if (!tgtr_scmd)
- /* no more commands, that's it */
- break;
+ list_splice_init(work_q, &tmp_list);
+
+ while (!list_empty(&tmp_list)) {
+ struct scsi_cmnd *next, *scmd;
+ int rtn;
+ unsigned int id;
+
+ scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);
+ id = scmd_id(scmd);
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending target reset "
"to target %d\n",
current->comm, id));
- rtn = scsi_try_target_reset(tgtr_scmd);
- if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
- list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
- if (id == scmd_id(scmd))
- if (!scsi_device_online(scmd->device) ||
- rtn == FAST_IO_FAIL ||
- !scsi_eh_tur(tgtr_scmd))
- scsi_eh_finish_cmd(scmd,
- done_q);
- }
- } else
+ rtn = scsi_try_target_reset(scmd);
+ if (rtn != SUCCESS && rtn != FAST_IO_FAIL)
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Target reset"
" failed target: "
"%d\n",
current->comm, id));
- id++;
- } while(id != 0);
+ list_for_each_entry_safe(scmd, next, &tmp_list, eh_entry) {
+ if (scmd_id(scmd) != id)
+ continue;
+
+ if ((rtn == SUCCESS || rtn == FAST_IO_FAIL)
+ && (!scsi_device_online(scmd->device) ||
+ rtn == FAST_IO_FAIL || !scsi_eh_tur(scmd)))
+ scsi_eh_finish_cmd(scmd, done_q);
+ else
+ /* push back on work queue for further processing */
+ list_move(&scmd->eh_entry, work_q);
+ }
+ }
return list_empty(work_q);
}
/**
- * scsi_eh_bus_reset - send a bus reset
+ * scsi_eh_bus_reset - send a bus reset
* @shost: &scsi host being recovered.
* @work_q: &list_head for pending commands.
* @done_q: &list_head for processed commands.
@@ -1223,7 +1192,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
* we really want to loop over the various channels, and do this on
* a channel by channel basis. we should also check to see if any
* of the failed commands are on soft_reset devices, and if so, skip
- * the reset.
+ * the reset.
*/
for (channel = 0; channel <= shost->max_channel; channel++) {
@@ -1265,7 +1234,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
}
/**
- * scsi_eh_host_reset - send a host reset
+ * scsi_eh_host_reset - send a host reset
* @work_q: list_head for processed commands.
* @done_q: list_head for processed commands.
*/
@@ -1418,7 +1387,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
return SUCCESS;
/*
* when the low level driver returns did_soft_error,
- * it is responsible for keeping an internal retry counter
+ * it is responsible for keeping an internal retry counter
* in order to avoid endless loops (db)
*
* actually this is a bug in this function here. we should
@@ -1456,7 +1425,6 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
*/
break;
/* fallthrough */
-
case DID_BUS_BUSY:
case DID_PARITY:
goto maybe_retry;
@@ -1512,6 +1480,14 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
rtn = scsi_check_sense(scmd);
if (rtn == NEEDS_RETRY)
goto maybe_retry;
+ else if (rtn == TARGET_ERROR) {
+ /*
+ * Need to modify host byte to signal a
+ * permanent target failure
+ */
+ scmd->result |= (DID_TARGET_FAILURE << 16);
+ rtn = SUCCESS;
+ }
/* if rtn == FAILED, we have no sense information;
* returning FAILED will wake the error handler thread
* to collect the sense and redo the decide
@@ -1529,6 +1505,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
case RESERVATION_CONFLICT:
sdev_printk(KERN_INFO, scmd->device,
"reservation conflict\n");
+ scmd->result |= (DID_NEXUS_FAILURE << 16);
return SUCCESS; /* causes immediate i/o error */
default:
return FAILED;
@@ -2015,7 +1992,7 @@ int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
if (sb_len > 7)
sshdr->additional_length = sense_buffer[7];
} else {
- /*
+ /*
* fixed format
*/
if (sb_len > 2)