From 1ed25b269e3dd5ecc64f17beef9ea21745c39ca6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 8 Jun 2012 14:09:54 +0200 Subject: drbd: fix list corruption by failing but already aborted reads If a read is aborted due to force-detach of a supposedly unresponsive local backing device, and retried on the peer, it can happen that the local request later still completes (hopefully with an error). As it may already have been completed to upper layers meanwhile, it must not be retried again now. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9c5c84946b0..773f4e2d3c1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -472,12 +472,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + if (req->rq_state & RQ_LOCAL_ABORTED) { + _req_may_be_done(req, m); + break; + } __drbd_chk_io_error(mdev, false); goto_queue_for_net_read: + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { -- cgit v1.2.3-70-g09d2 From 0d5934e3c258fc5decc4103600c597086fd95a52 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 8 Jun 2012 14:17:36 +0200 Subject: drbd: fix null pointer dereference with on-congestion policy when diskless We must not look at mdev->actlog, unless we have a get_ldev() reference. It also does not make much sense to try to disconnect or pull-ahead of the peer, if we don't have good local data. Only even consider congestion policies, if our local disk is D_UP_TO_DATE. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 59 ++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 23 deletions(-) (limited to 'drivers/block/drbd/drbd_req.c') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 773f4e2d3c1..8e93a6ac9bb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -770,6 +770,40 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); } +static void maybe_pull_ahead(struct drbd_conf *mdev) +{ + int congested = 0; + + /* If I don't even have good local storage, we can not reasonably try + * to pull ahead of the peer. We also need the local reference to make + * sure mdev->act_log is there. + * Note: caller has to make sure that net_conf is there. + */ + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) + return; + + if (mdev->net_conf->cong_fill && + atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { + dev_info(DEV, "Congestion-fill threshold reached\n"); + congested = 1; + } + + if (mdev->act_log->used >= mdev->net_conf->cong_extents) { + dev_info(DEV, "Congestion-extents threshold reached\n"); + congested = 1; + } + + if (congested) { + queue_barrier(mdev); /* last barrier, after mirrored writes */ + + if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) + _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); + else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ + _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); + } + put_ldev(mdev); +} + static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); @@ -977,29 +1011,8 @@ allocate_barrier: _req_mod(req, queue_for_send_oos); if (remote && - mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { - int congested = 0; - - if (mdev->net_conf->cong_fill && - atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { - dev_info(DEV, "Congestion-fill threshold reached\n"); - congested = 1; - } - - if (mdev->act_log->used >= mdev->net_conf->cong_extents) { - dev_info(DEV, "Congestion-extents threshold reached\n"); - congested = 1; - } - - if (congested) { - queue_barrier(mdev); /* last barrier, after mirrored writes */ - - if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) - _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); - else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ - _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); - } - } + mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) + maybe_pull_ahead(mdev); spin_unlock_irq(&mdev->req_lock); kfree(b); /* if someone else has beaten us to it... */ -- cgit v1.2.3-70-g09d2