diff options
author | Stefan Weinhuber <wein@de.ibm.com> | 2009-12-07 12:51:51 +0100 |
---|---|---|
committer | Martin Schwidefsky <sky@mschwide.boeblingen.de.ibm.com> | 2009-12-07 12:51:34 +0100 |
commit | eb6e199bef288611157b8198c25d12b32bf058d0 (patch) | |
tree | 80737a2703a9f4d09cee2410342aeccb281413ae /drivers/s390/block/dasd_3990_erp.c | |
parent | 626350b63ef2cd447023d3dc2a34eaa7ca01bfff (diff) |
[S390] dasd: improve error recovery for internal I/O
Most of the error conditions reported by a FICON storage server
indicate situations which can be recovered. Sometimes the host just
needs to retry an I/O request, but sometimes the recovery
is more complex and requires the device driver to wait, choose
a different path, etc.
The DASD device driver has a fully featured error recovery
for normal block layer I/O, but not for internal I/O request which
are for example used during the device bring up.
This can lead to situations where the IPL of a system fails because
DASD devices are not properly recognized.
This patch will extend the internal I/O handling to use the existing
error recovery procedures.
Signed-off-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'drivers/s390/block/dasd_3990_erp.c')
-rw-r--r-- | drivers/s390/block/dasd_3990_erp.c | 46 |
1 files changed, 33 insertions, 13 deletions
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 316eb1256a9..44796ba4eb9 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -69,8 +69,7 @@ dasd_3990_erp_cleanup(struct dasd_ccw_req * erp, char final_status) * processing until the started timer has expired or an related * interrupt was received. */ -static void -dasd_3990_erp_block_queue(struct dasd_ccw_req * erp, int expires) +static void dasd_3990_erp_block_queue(struct dasd_ccw_req *erp, int expires) { struct dasd_device *device = erp->startdev; @@ -80,10 +79,13 @@ dasd_3990_erp_block_queue(struct dasd_ccw_req * erp, int expires) "blocking request queue for %is", expires/HZ); spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - device->stopped |= DASD_STOPPED_PENDING; + dasd_device_set_stop_bits(device, DASD_STOPPED_PENDING); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); erp->status = DASD_CQR_FILLED; - dasd_block_set_timer(device->block, expires); + if (erp->block) + dasd_block_set_timer(erp->block, expires); + else + dasd_device_set_timer(device, expires); } /* @@ -242,9 +244,13 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier) * DESCRIPTION * Setup ERP to do the ERP action 1 (see Reference manual). * Repeat the operation on a different channel path. - * If all alternate paths have been tried, the request is posted with a - * permanent error. - * Note: duplex handling is not implemented (yet). + * As deviation from the recommended recovery action, we reset the path mask + * after we have tried each path and go through all paths a second time. + * This will cover situations where only one path at a time is actually down, + * but all paths fail and recover just with the same sequence and timing as + * we try to use them (flapping links). + * If all alternate paths have been tried twice, the request is posted with + * a permanent error. * * PARAMETER * erp pointer to the current ERP @@ -253,17 +259,25 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier) * erp pointer to the ERP * */ -static struct dasd_ccw_req * -dasd_3990_erp_action_1(struct dasd_ccw_req * erp) +static struct dasd_ccw_req *dasd_3990_erp_action_1_sec(struct dasd_ccw_req *erp) { + erp->function = dasd_3990_erp_action_1_sec; + dasd_3990_erp_alternate_path(erp); + return erp; +} +static struct dasd_ccw_req *dasd_3990_erp_action_1(struct dasd_ccw_req *erp) +{ erp->function = dasd_3990_erp_action_1; - dasd_3990_erp_alternate_path(erp); - + if (erp->status == DASD_CQR_FAILED) { + erp->status = DASD_CQR_FILLED; + erp->retries = 10; + erp->lpm = LPM_ANYPATH; + erp->function = dasd_3990_erp_action_1_sec; + } return erp; - -} /* end dasd_3990_erp_action_1 */ +} /* end dasd_3990_erp_action_1(b) */ /* * DASD_3990_ERP_ACTION_4 @@ -2294,6 +2308,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) return cqr; } + ccw = cqr->cpaddr; if (cqr->cpmode == 1) { /* make a shallow copy of the original tcw but set new tsb */ erp->cpmode = 1; @@ -2302,6 +2317,9 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) tsb = (struct tsb *) &tcw[1]; *tcw = *((struct tcw *)cqr->cpaddr); tcw->tsb = (long)tsb; + } else if (ccw->cmd_code == DASD_ECKD_CCW_PSF) { + /* PSF cannot be chained from NOOP/TIC */ + erp->cpaddr = cqr->cpaddr; } else { /* initialize request with default TIC to current ERP/CQR */ ccw = erp->cpaddr; @@ -2486,6 +2504,8 @@ dasd_3990_erp_further_erp(struct dasd_ccw_req *erp) erp = dasd_3990_erp_action_1(erp); + } else if (erp->function == dasd_3990_erp_action_1_sec) { + erp = dasd_3990_erp_action_1_sec(erp); } else if (erp->function == dasd_3990_erp_action_5) { /* retries have not been successful */ |