From cf14c2e987ba0a09a7b09be2ecd55af0bc9c17b4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 2 Feb 2010 21:03:50 +0100 Subject: drbd: --dry-run option for drbdsetup net ( drbdadm -- --dry-run connect ) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d065c646b35..8bcde4a9632 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2538,6 +2538,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } + if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { + if (hg == 0) + dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); + else + dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.", + drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), + abs(hg) >= 2 ? "full" : "bit-map based"); + return C_MASK; + } + if (abs(hg) >= 2) { dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) @@ -2585,7 +2595,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; - int p_want_lose, p_two_primaries; + int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; header_size = sizeof(*p) - sizeof(*h); @@ -2598,8 +2608,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) p_after_sb_0p = be32_to_cpu(p->after_sb_0p); p_after_sb_1p = be32_to_cpu(p->after_sb_1p); p_after_sb_2p = be32_to_cpu(p->after_sb_2p); - p_want_lose = be32_to_cpu(p->want_lose); p_two_primaries = be32_to_cpu(p->two_primaries); + cf = be32_to_cpu(p->conn_flags); + p_want_lose = cf & CF_WANT_LOSE; + + clear_bit(CONN_DRY_RUN, &mdev->flags); + + if (cf & CF_DRY_RUN) + set_bit(CONN_DRY_RUN, &mdev->flags); if (p_proto != mdev->net_conf->wire_protocol) { dev_err(DEV, "incompatible communication protocols\n"); @@ -3125,6 +3141,8 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = D_DISKLESS; } else { + if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) + return FALSE; D_ASSERT(oconn == C_WF_REPORT_PARAMS); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; -- cgit v1.2.3-70-g09d2 From 580b9767dbdf2c049c4d05330c70ea786ef01016 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 26 Feb 2010 23:15:23 +0100 Subject: drbd: fix broken state change after split-brain attach while connected Situation: we have diverging data sets, i.e. we had a split brain somewhen, but currently are connected, one node diskless. Then we try to attach that disk, figure it is consistent, but has a diverging data set, we refuse to attach. This led to strange state changes: 22:18:35 bb drbd1: peer( Unknown -> Primary ) conn( WFReportParams -> Connected) pdsk( DUnknown -> UpToDate ) 22:19:30 bb drbd1: disk( Diskless -> Attaching ) 22:19:30 bb drbd1: disk( Attaching -> Negotiating ) 22:19:30 bb drbd1: drbd_sync_handshake: 22:19:30 bb drbd1: self 97BF25798B9D5222:F33D1F62ADE698DD:4269796F9D027C83:AC45D8B5C3C1BF93 bits:19449 flags:0 22:19:30 bb drbd1: peer 280DFB6E125465D3:F33D1F62ADE698DC:4269796F9D027C82:AC45D8B5C3C1BF93 bits:2575806 flags:0 22:19:30 bb drbd1: uuid_compare()=100 by rule 90 22:19:30 bb drbd1: Split-Brain detected, dropping connection! 22:19:30 bb drbd1: disk( Negotiating -> Diskless ) while the other side says: 22:19:30 aa drbd1: Split-Brain detected, dropping connection! 22:19:30 aa drbd1: Disk attach process on the peer node was aborted. 22:19:30 aa drbd1: conn( Connected -> TOO_LARGE ) pdsk( Diskless -> Consistent ) This should be fixed now. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8bcde4a9632..41f36a9cd40 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2513,6 +2513,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { + /* FIXME this log message is not correct if we end up here + * after an attempted attach on a diskless node. + * We just refuse to attach -- well, we drop the "connection" + * to that disk, in a way... */ dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); drbd_khelper(mdev, "split-brain"); return C_MASK; @@ -3134,12 +3138,13 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) put_ldev(mdev); if (nconn == C_MASK) { + nconn = C_CONNECTED; if (mdev->state.disk == D_NEGOTIATING) { drbd_force_state(mdev, NS(disk, D_DISKLESS)); - nconn = C_CONNECTED; } else if (peer_state.disk == D_NEGOTIATING) { dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = D_DISKLESS; + real_peer_disk = D_DISKLESS; } else { if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) return FALSE; -- cgit v1.2.3-70-g09d2 From 4589d7f829951c1713ef5a4ad1a9bb563da329b5 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 3 Mar 2010 02:25:33 +0100 Subject: drbd_disconnect: grab meta.socket mutex as well Fixes a race and potential kernel panic if e.g. the worker was just about to send a few P_RS_IS_IN_SYNC via the meta socket for checksum based resync, while the receiver destroys the sockets in drbd_disconnect. To make sure no-one is using the meta socket, it is not enough to stop the asender... Grab the meta socket mutex before destroying it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++++ drivers/block/drbd/drbd_receiver.c | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b2d347d18c7..67e0fc54224 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3173,14 +3173,18 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) void drbd_free_sock(struct drbd_conf *mdev) { if (mdev->data.socket) { + mutex_lock(&mdev->data.mutex); kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); sock_release(mdev->data.socket); mdev->data.socket = NULL; + mutex_unlock(&mdev->data.mutex); } if (mdev->meta.socket) { + mutex_lock(&mdev->meta.mutex); kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); sock_release(mdev->meta.socket); mdev->meta.socket = NULL; + mutex_unlock(&mdev->meta.mutex); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 41f36a9cd40..d803e6c257e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3617,10 +3617,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* asender does not clean up anything. it must not interfere, either */ drbd_thread_stop(&mdev->asender); - - mutex_lock(&mdev->data.mutex); drbd_free_sock(mdev); - mutex_unlock(&mdev->data.mutex); spin_lock_irq(&mdev->req_lock); _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); -- cgit v1.2.3-70-g09d2 From 309d1608cce32903d67d47e7545e232c400b6aa0 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 2 Mar 2010 15:03:44 +0100 Subject: drbd: Reduce the time an empty resync takes usually This mitigates changes introduced with commit: http://git.drbd.org/?p=drbd-8.3.git;a=commit;h=4b6803a3276652da3737 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 2 ++ drivers/block/drbd/drbd_worker.c | 12 +++++++++--- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 844206c3185..2d5cebbbf25 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -800,6 +800,7 @@ enum { RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ + GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d803e6c257e..ed9f1de24a7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4074,6 +4074,8 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) + wake_up(&mdev->misc_wait); return TRUE; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d97a811ad0d..4672f2f37b5 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1289,6 +1289,14 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) return retcode; } +static void ping_peer(struct drbd_conf *mdev) +{ + clear_bit(GOT_PING_ACK, &mdev->flags); + request_ping(mdev); + wait_event(mdev->misc_wait, + test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); +} + /** * drbd_start_resync() - Start the resync process * @mdev: DRBD device. @@ -1383,9 +1391,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) if (mdev->rs_total == 0) { /* Peer still reachable? Beware of failing before-resync-target handlers! */ - request_ping(mdev); - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */ + ping_peer(mdev); drbd_resync_finished(mdev); return; } -- cgit v1.2.3-70-g09d2