summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_worker.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_worker.c')
-rw-r--r--drivers/block/drbd/drbd_worker.c37
1 files changed, 37 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 859afdfe5a0..eeda8b8e9d8 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -221,6 +221,43 @@ void drbd_request_endio(struct bio *bio, int error)
error = -EIO;
}
+
+ /* If this request was aborted locally before,
+ * but now was completed "successfully",
+ * chances are that this caused arbitrary data corruption.
+ *
+ * "aborting" requests, or force-detaching the disk, is intended for
+ * completely blocked/hung local backing devices which do no longer
+ * complete requests at all, not even do error completions. In this
+ * situation, usually a hard-reset and failover is the only way out.
+ *
+ * By "aborting", basically faking a local error-completion,
+ * we allow for a more graceful swichover by cleanly migrating services.
+ * Still the affected node has to be rebooted "soon".
+ *
+ * By completing these requests, we allow the upper layers to re-use
+ * the associated data pages.
+ *
+ * If later the local backing device "recovers", and now DMAs some data
+ * from disk into the original request pages, in the best case it will
+ * just put random data into unused pages; but typically it will corrupt
+ * meanwhile completely unrelated data, causing all sorts of damage.
+ *
+ * Which means delayed successful completion,
+ * especially for READ requests,
+ * is a reason to panic().
+ *
+ * We assume that a delayed *error* completion is OK,
+ * though we still will complain noisily about it.
+ */
+ if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
+
+ if (!error)
+ panic("possible random memory corruption caused by delayed completion of aborted local request\n");
+ }
+
/* to avoid recursion in __req_mod */
if (unlikely(error)) {
what = (bio_data_dir(bio) == WRITE)