diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 20 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 9 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 4 |
5 files changed, 63 insertions, 11 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eeab868f056..32a9ab67a5f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -682,7 +682,8 @@ enum { once no more io in flight, start bitmap io */ BITMAP_IO_QUEUED, /* Started bitmap IO */ GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ - WAS_IO_ERROR, /* Local disk failed returned IO error */ + WAS_IO_ERROR, /* Local disk failed, returned IO error */ + WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from @@ -1142,6 +1143,9 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why, enum bm_flag flags); +extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + char *why, enum bm_flag flags); extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_go_diskless(struct drbd_conf *mdev); @@ -1661,14 +1665,15 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) } enum drbd_force_detach_flags { - DRBD_IO_ERROR, + DRBD_READ_ERROR, + DRBD_WRITE_ERROR, DRBD_META_IO_ERROR, DRBD_FORCE_DETACH, }; #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, - enum drbd_force_detach_flags forcedetach, + enum drbd_force_detach_flags df, const char *where) { enum drbd_io_error_p ep; @@ -1678,18 +1683,40 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, rcu_read_unlock(); switch (ep) { case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ - if (forcedetach == DRBD_IO_ERROR) { + if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); if (mdev->state.disk > D_INCONSISTENT) _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); break; } - /* NOTE fall through to detach case if forcedetach set */ + /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */ case EP_DETACH: case EP_CALL_HELPER: + /* Remember whether we saw a READ or WRITE error. + * + * Recovery of the affected area for WRITE failure is covered + * by the activity log. + * READ errors may fall outside that area though. Certain READ + * errors can be "healed" by writing good data to the affected + * blocks, which triggers block re-allocation in lower layers. + * + * If we can not write the bitmap after a READ error, + * we may need to trigger a full sync (see w_go_diskless()). + * + * Force-detach is not really an IO error, but rather a + * desperate measure to try to deal with a completely + * unresponsive lower level IO stack. + * Still it should be treated as a WRITE error. + * + * Meta IO error is always WRITE error: + * we read meta data only once during attach, + * which will fail in case of errors. + */ set_bit(WAS_IO_ERROR, &mdev->flags); - if (forcedetach == DRBD_FORCE_DETACH) + if (df == DRBD_READ_ERROR) + set_bit(WAS_READ_ERROR, &mdev->flags); + if (df == DRBD_FORCE_DETACH) set_bit(FORCE_DETACH, &mdev->flags); if (mdev->state.disk > D_FAILED) { _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5e5a6abb281..0f73e157dee 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3226,6 +3226,26 @@ static int w_go_diskless(struct drbd_work *w, int unused) * inc/dec it frequently. Once we are D_DISKLESS, no one will touch * the protected members anymore, though, so once put_ldev reaches zero * again, it will be safe to free them. */ + + /* Try to write changed bitmap pages, read errors may have just + * set some bits outside the area covered by the activity log. + * + * If we have an IO error during the bitmap writeout, + * we will want a full sync next time, just in case. + * (Do we want a specific meta data flag for this?) + * + * If that does not make it to stable storage either, + * we cannot do anything about that anymore. */ + if (mdev->bitmap) { + if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, + "detach", BM_LOCKED_MASK)) { + if (test_bit(WAS_READ_ERROR, &mdev->flags)) { + drbd_md_set_flag(mdev, MDF_FULL_SYNC); + drbd_md_sync(mdev); + } + } + } + drbd_force_state(mdev, NS(disk, D_DISKLESS)); return 0; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 476491ffdab..52258867222 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1294,6 +1294,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* make sure there is no leftover from previous force-detach attempts */ clear_bit(FORCE_DETACH, &mdev->flags); + clear_bit(WAS_IO_ERROR, &mdev->flags); + clear_bit(WAS_READ_ERROR, &mdev->flags); /* and no leftover from previously aborted resync or verify, either */ mdev->rs_total = 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e307890e6af..97a9e69dd23 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -492,11 +492,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED); break; + case WRITE_COMPLETED_WITH_ERROR: + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); + mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); + break; + case READ_COMPLETED_WITH_ERROR: drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); - /* fall through. */ - case WRITE_COMPLETED_WITH_ERROR: - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); /* fall through. */ case READ_AHEAD_COMPLETED_WITH_ERROR: /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d348260301f..64a7305c678 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -106,7 +106,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_READ_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); @@ -147,7 +147,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, DRBD_IO_ERROR); + __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (block_id == ID_SYNCER) |