summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_req.c
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2011-11-24 10:36:25 +0100
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 16:58:21 +0100
commit2312f0b3c5ab794fbac9e9bebe90c784c9d449c5 (patch)
tree0b4b74af502169d61e026855593b828fa202665a /drivers/block/drbd/drbd_req.c
parentf9916d61a40e7ad43c2a20444894f85c45512f91 (diff)
drbd: fix potential deadlock during "restart" of conflicting writes
w_restart_write(), run from worker context, calls __drbd_make_request() and further drbd_al_begin_io(, delegate=true), which then potentially deadlocks. The previous patch moved a BUG_ON to expose such call paths, which would now be triggered. Also, if we call __drbd_make_request() from resource worker context, like w_restart_write() did, and that should block for whatever reason (!drbd_state_is_stable(), resource suspended, ...), we potentially deadlock the whole resource, as the worker is needed for state changes and other things. Create a dedicated retry workqueue for this instead. Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired, even if do_retry() needs to retry itself, in case __drbd_make_request() returns != 0. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_req.c')
-rw-r--r--drivers/block/drbd/drbd_req.c17
1 files changed, 14 insertions, 3 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index c3f99bde0e1..5f4436c3abb 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -104,7 +104,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
* and never sent), it should still be "empty" as
* initialized in drbd_req_new(), so we can list_del() it
* here unconditionally */
- list_del(&req->tl_requests);
+ list_del_init(&req->tl_requests);
/* if it was a write, we may have to set the corresponding
* bit(s) out-of-sync first. If it had a local part, we need to
@@ -143,7 +143,10 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
}
}
- drbd_req_free(req);
+ if (s & RQ_POSTPONED)
+ drbd_restart_write(req);
+ else
+ drbd_req_free(req);
}
static void queue_barrier(struct drbd_conf *mdev)
@@ -289,8 +292,16 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
if (!(s & RQ_POSTPONED)) {
m->error = ok ? 0 : (error ?: -EIO);
m->bio = req->master_bio;
+ req->master_bio = NULL;
+ } else {
+ /* Assert that this will be _req_is_done()
+ * with this very invokation. */
+ /* FIXME:
+ * what about (RQ_LOCAL_PENDING | RQ_LOCAL_ABORTED)?
+ */
+ D_ASSERT(!(s & RQ_LOCAL_PENDING));
+ D_ASSERT(s & RQ_NET_DONE);
}
- req->master_bio = NULL;
}
if (s & RQ_LOCAL_PENDING)