1 files changed, 68 insertions, 137 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ad534216507..18c5d252301 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -241,11 +241,10 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
 
 		if (cmd_cancel || cmd_failed) {
 			SCSI_LOG_ERROR_RECOVERY(3,
-				printk("%s: %d:%d:%d:%d cmds failed: %d,"
-				       " cancel: %d\n",
-				       __FUNCTION__, shost->host_no,
-				       sdev->channel, sdev->id, sdev->lun,
-				       cmd_failed, cmd_cancel));
+				sdev_printk(KERN_INFO, sdev,
+					    "%s: cmds failed: %d, cancel: %d\n",
+					    __FUNCTION__, cmd_failed,
+					    cmd_cancel));
 			cmd_cancel = 0;
 			cmd_failed = 0;
 			++devices_failed;
@@ -418,43 +417,15 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
 }
 
 /**
- * scsi_eh_times_out - timeout function for error handling.
- * @scmd:	Cmd that is timing out.
- *
- * Notes:
- *    During error handling, the kernel thread will be sleeping waiting
- *    for some action to complete on the device.  our only job is to
- *    record that it timed out, and to wake up the thread.
- **/
-static void scsi_eh_times_out(struct scsi_cmnd *scmd)
-{
-	scmd->eh_eflags |= SCSI_EH_REC_TIMEOUT;
-	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
-					  scmd));
-
-	up(scmd->device->host->eh_action);
-}
-
-/**
  * scsi_eh_done - Completion function for error handling.
  * @scmd:	Cmd that is done.
  **/
 static void scsi_eh_done(struct scsi_cmnd *scmd)
 {
-	/*
-	 * if the timeout handler is already running, then just set the
-	 * flag which says we finished late, and return.  we have no
-	 * way of stopping the timeout handler from running, so we must
-	 * always defer to it.
-	 */
-	if (del_timer(&scmd->eh_timeout)) {
-		scmd->request->rq_status = RQ_SCSI_DONE;
-
-		SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
-					   __FUNCTION__, scmd, scmd->result));
-
-		up(scmd->device->host->eh_action);
-	}
+	SCSI_LOG_ERROR_RECOVERY(3,
+		printk("%s scmd: %p result: %x\n",
+			__FUNCTION__, scmd, scmd->result));
+	complete(scmd->device->host->eh_action);
 }
 
 /**
@@ -462,10 +433,6 @@ static void scsi_eh_done(struct scsi_cmnd *scmd)
  * @scmd:	SCSI Cmd to send.
  * @timeout:	Timeout for cmd.
  *
- * Notes:
- *    The initialization of the structures is quite a bit different in
- *    this case, and furthermore, there is a different completion handler
- *    vs scsi_dispatch_cmd.
  * Return value:
  *    SUCCESS or FAILED or NEEDS_RETRY
  **/
@@ -473,24 +440,16 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
 {
 	struct scsi_device *sdev = scmd->device;
 	struct Scsi_Host *shost = sdev->host;
-	DECLARE_MUTEX_LOCKED(sem);
+	DECLARE_COMPLETION(done);
+	unsigned long timeleft;
 	unsigned long flags;
-	int rtn = SUCCESS;
+	int rtn;
 
-	/*
-	 * we will use a queued command if possible, otherwise we will
-	 * emulate the queuing and calling of completion function ourselves.
-	 */
 	if (sdev->scsi_level <= SCSI_2)
 		scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
 			(sdev->lun << 5 & 0xe0);
 
-	scsi_add_timer(scmd, timeout, scsi_eh_times_out);
-
-	/*
-	 * set up the semaphore so we wait for the command to complete.
-	 */
-	shost->eh_action = &sem;
+	shost->eh_action = &done;
 	scmd->request->rq_status = RQ_SCSI_BUSY;
 
 	spin_lock_irqsave(shost->host_lock, flags);
@@ -498,47 +457,29 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
 	shost->hostt->queuecommand(scmd, scsi_eh_done);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
-	down(&sem);
-	scsi_log_completion(scmd, SUCCESS);
+	timeleft = wait_for_completion_timeout(&done, timeout);
 
+	scmd->request->rq_status = RQ_SCSI_DONE;
 	shost->eh_action = NULL;
 
-	/*
-	 * see if timeout.  if so, tell the host to forget about it.
-	 * in other words, we don't want a callback any more.
-	 */
-	if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) {
-		scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT;
-
-		/*
-		 * as far as the low level driver is
-		 * concerned, this command is still active, so
-		 * we must give the low level driver a chance
-		 * to abort it. (db) 
-		 *
-		 * FIXME(eric) - we are not tracking whether we could
-		 * abort a timed out command or not.  not sure how
-		 * we should treat them differently anyways.
-		 */
-		if (shost->hostt->eh_abort_handler)
-			shost->hostt->eh_abort_handler(scmd);
-			
-		scmd->request->rq_status = RQ_SCSI_DONE;
-		rtn = FAILED;
-	}
+	scsi_log_completion(scmd, SUCCESS);
 
-	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
-					  __FUNCTION__, scmd, rtn));
+	SCSI_LOG_ERROR_RECOVERY(3,
+		printk("%s: scmd: %p, timeleft: %ld\n",
+			__FUNCTION__, scmd, timeleft));
 
 	/*
-	 * now examine the actual status codes to see whether the command
-	 * actually did complete normally.
+	 * If there is time left scsi_eh_done got called, and we will
+	 * examine the actual status codes to see whether the command
+	 * actually did complete normally, else tell the host to forget
+	 * about this command.
 	 */
-	if (rtn == SUCCESS) {
+	if (timeleft) {
 		rtn = scsi_eh_completed_normally(scmd);
 		SCSI_LOG_ERROR_RECOVERY(3,
 			printk("%s: scsi_eh_completed_normally %x\n",
 			       __FUNCTION__, rtn));
+
 		switch (rtn) {
 		case SUCCESS:
 		case NEEDS_RETRY:
@@ -548,6 +489,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
 			rtn = FAILED;
 			break;
 		}
+	} else {
+		/*
+		 * FIXME(eric) - we are not tracking whether we could
+		 * abort a timed out command or not.  not sure how
+		 * we should treat them differently anyways.
+		 */
+		if (shost->hostt->eh_abort_handler)
+			shost->hostt->eh_abort_handler(scmd);
+		rtn = FAILED;
 	}
 
 	return rtn;
@@ -674,10 +624,9 @@ static int scsi_eh_get_sense(struct list_head *work_q,
 		    SCSI_SENSE_VALID(scmd))
 			continue;
 
-		SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
-						  " for id: %d\n",
-						  current->comm,
-						  scmd->device->id));
+		SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
+						  "%s: requesting sense\n",
+						  current->comm));
 		rtn = scsi_request_sense(scmd);
 		if (rtn != SUCCESS)
 			continue;
@@ -1035,7 +984,8 @@ static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
 		if (!scmd->device->host->hostt->skip_settle_delay)
 			ssleep(BUS_RESET_SETTLE_TIME);
 		spin_lock_irqsave(scmd->device->host->host_lock, flags);
-		scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
+		scsi_report_bus_reset(scmd->device->host,
+				      scmd_channel(scmd));
 		spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
 	}
 
@@ -1063,7 +1013,8 @@ static int scsi_try_host_reset(struct scsi_cmnd *scmd)
 		if (!scmd->device->host->hostt->skip_settle_delay)
 			ssleep(HOST_RESET_SETTLE_TIME);
 		spin_lock_irqsave(scmd->device->host->host_lock, flags);
-		scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
+		scsi_report_bus_reset(scmd->device->host,
+				      scmd_channel(scmd));
 		spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
 	}
 
@@ -1093,7 +1044,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
 	for (channel = 0; channel <= shost->max_channel; channel++) {
 		chan_scmd = NULL;
 		list_for_each_entry(scmd, work_q, eh_entry) {
-			if (channel == scmd->device->channel) {
+			if (channel == scmd_channel(scmd)) {
 				chan_scmd = scmd;
 				break;
 				/*
@@ -1111,7 +1062,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost,
 		rtn = scsi_try_bus_reset(chan_scmd);
 		if (rtn == SUCCESS) {
 			list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
-				if (channel == scmd->device->channel)
+				if (channel == scmd_channel(scmd))
 					if (!scsi_device_online(scmd->device) ||
 					    !scsi_eh_tur(scmd))
 						scsi_eh_finish_cmd(scmd,
@@ -1174,13 +1125,9 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,
 	struct scsi_cmnd *scmd, *next;
 
 	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
-		printk(KERN_INFO "scsi: Device offlined - not"
-		       		" ready after error recovery: host"
-				" %d channel %d id %d lun %d\n",
-				scmd->device->host->host_no,
-				scmd->device->channel,
-				scmd->device->id,
-				scmd->device->lun);
+		sdev_printk(KERN_INFO, scmd->device,
+			    "scsi: Device offlined - not"
+			    " ready after error recovery\n");
 		scsi_device_set_state(scmd->device, SDEV_OFFLINE);
 		if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
 			/*
@@ -1342,10 +1289,8 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 		return SUCCESS;
 
 	case RESERVATION_CONFLICT:
-		printk(KERN_INFO "scsi: reservation conflict: host"
-                                " %d channel %d id %d lun %d\n",
-		       scmd->device->host->host_no, scmd->device->channel,
-		       scmd->device->id, scmd->device->lun);
+		sdev_printk(KERN_INFO, scmd->device,
+			    "reservation conflict\n");
 		return SUCCESS; /* causes immediate i/o error */
 	default:
 		return FAILED;
@@ -1577,50 +1522,41 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
 }
 
 /**
- * scsi_error_handler - Handle errors/timeouts of SCSI cmds.
+ * scsi_error_handler - SCSI error handler thread
  * @data:	Host for which we are running.
  *
  * Notes:
- *    This is always run in the context of a kernel thread.  The idea is
- *    that we start this thing up when the kernel starts up (one per host
- *    that we detect), and it immediately goes to sleep and waits for some
- *    event (i.e. failure).  When this takes place, we have the job of
- *    trying to unjam the bus and restarting things.
+ *    This is the main error handling loop.  This is run as a kernel thread
+ *    for every SCSI host and handles all error handling activity.
  **/
 int scsi_error_handler(void *data)
 {
-	struct Scsi_Host *shost = (struct Scsi_Host *) data;
-	int rtn;
+	struct Scsi_Host *shost = data;
 
 	current->flags |= PF_NOFREEZE;
 
-	
 	/*
-	 * Note - we always use TASK_INTERRUPTIBLE even if the module
-	 * was loaded as part of the kernel.  The reason is that
-	 * UNINTERRUPTIBLE would cause this thread to be counted in
-	 * the load average as a running process, and an interruptible
-	 * wait doesn't.
+	 * We use TASK_INTERRUPTIBLE so that the thread is not
+	 * counted against the load average as a running process.
+	 * We never actually get interrupted because kthread_run
+	 * disables singal delivery for the created thread.
 	 */
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
 		if (shost->host_failed == 0 ||
 		    shost->host_failed != shost->host_busy) {
-			SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
-							  " scsi_eh_%d"
-							  " sleeping\n",
-							  shost->host_no));
+			SCSI_LOG_ERROR_RECOVERY(1,
+				printk("Error handler scsi_eh_%d sleeping\n",
+					shost->host_no));
 			schedule();
 			set_current_state(TASK_INTERRUPTIBLE);
 			continue;
 		}
 
 		__set_current_state(TASK_RUNNING);
-		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
-						  " scsi_eh_%d waking"
-						  " up\n",shost->host_no));
-
-		shost->eh_active = 1;
+		SCSI_LOG_ERROR_RECOVERY(1,
+			printk("Error handler scsi_eh_%d waking up\n",
+				shost->host_no));
 
 		/*
 		 * We have a host that is failing for some reason.  Figure out
@@ -1628,12 +1564,10 @@ int scsi_error_handler(void *data)
 		 * If we fail, we end up taking the thing offline.
 		 */
 		if (shost->hostt->eh_strategy_handler) 
-			rtn = shost->hostt->eh_strategy_handler(shost);
+			shost->hostt->eh_strategy_handler(shost);
 		else
 			scsi_unjam_host(shost);
 
-		shost->eh_active = 0;
-
 		/*
 		 * Note - if the above fails completely, the action is to take
 		 * individual devices offline and flush the queue of any
@@ -1644,13 +1578,10 @@ int scsi_error_handler(void *data)
 		scsi_restart_operations(shost);
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
+	__set_current_state(TASK_RUNNING);
 
-	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
-					  " exiting\n",shost->host_no));
-
-	/*
-	 * Make sure that nobody tries to wake us up again.
-	 */
+	SCSI_LOG_ERROR_RECOVERY(1,
+		printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
 	shost->ehandler = NULL;
 	return 0;
 }
@@ -1681,7 +1612,7 @@ void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
 	struct scsi_device *sdev;
 
 	__shost_for_each_device(sdev, shost) {
-		if (channel == sdev->channel) {
+		if (channel == sdev_channel(sdev)) {
 			sdev->was_reset = 1;
 			sdev->expecting_cc_ua = 1;
 		}
@@ -1716,8 +1647,8 @@ void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
 	struct scsi_device *sdev;
 
 	__shost_for_each_device(sdev, shost) {
-		if (channel == sdev->channel &&
-		    target == sdev->id) {
+		if (channel == sdev_channel(sdev) &&
+		    target == sdev_id(sdev)) {
 			sdev->was_reset = 1;
 			sdev->expecting_cc_ua = 1;
 		}