summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_worker.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2011-04-26 10:22:15 +0200
committerJiri Kosina <jkosina@suse.cz>2011-04-26 10:22:59 +0200
commit07f9479a40cc778bc1462ada11f95b01360ae4ff (patch)
tree0676cf38df3844004bb3ebfd99dfa67a4a8998f5 /drivers/block/drbd/drbd_worker.c
parent9d5e6bdb3013acfb311ab407eeca0b6a6a3dedbf (diff)
parentcd2e49e90f1cae7726c9a2c54488d881d7f1cd1c (diff)
Merge branch 'master' into for-next
Fast-forwarded to current state of Linus' tree as there are patches to be applied for files that didn't exist on the old branch.
Diffstat (limited to 'drivers/block/drbd/drbd_worker.c')
-rw-r--r--drivers/block/drbd/drbd_worker.c361
1 files changed, 194 insertions, 167 deletions
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 34f224b018b..f7e6c92f8d0 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -39,18 +39,17 @@
#include "drbd_req.h"
static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+static int w_make_resync_request(struct drbd_conf *mdev,
+ struct drbd_work *w, int cancel);
-/* defined here:
- drbd_md_io_complete
- drbd_endio_sec
- drbd_endio_pri
-
- * more endio handlers:
- atodb_endio in drbd_actlog.c
- drbd_bm_async_io_complete in drbd_bitmap.c
-
+/* endio handlers:
+ * drbd_md_io_complete (defined here)
+ * drbd_endio_pri (defined here)
+ * drbd_endio_sec (defined here)
+ * bm_async_io_complete (defined in drbd_bitmap.c)
+ *
* For all these callbacks, note the following:
* The callbacks will be called in irq context by the IDE drivers,
* and in Softirqs/Tasklets/BH context by the SCSI drivers.
@@ -94,7 +93,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
if (list_empty(&mdev->read_ee))
wake_up(&mdev->ee_wait);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, FALSE);
+ __drbd_chk_io_error(mdev, false);
spin_unlock_irqrestore(&mdev->req_lock, flags);
drbd_queue_work(&mdev->data.work, &e->w);
@@ -137,7 +136,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
: list_empty(&mdev->active_ee);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, FALSE);
+ __drbd_chk_io_error(mdev, false);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (is_syncer_req)
@@ -163,14 +162,15 @@ void drbd_endio_sec(struct bio *bio, int error)
int uptodate = bio_flagged(bio, BIO_UPTODATE);
int is_write = bio_data_dir(bio) == WRITE;
- if (error)
+ if (error && __ratelimit(&drbd_ratelimit_state))
dev_warn(DEV, "%s: error=%d s=%llus\n",
is_write ? "write" : "read", error,
(unsigned long long)e->sector);
if (!error && !uptodate) {
- dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
- is_write ? "write" : "read",
- (unsigned long long)e->sector);
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
+ is_write ? "write" : "read",
+ (unsigned long long)e->sector);
/* strange behavior of some lower level drivers...
* fail the request by clearing the uptodate flag,
* but do not return any error?! */
@@ -250,13 +250,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return w_send_read_req(mdev, w, 0);
}
-int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
- ERR_IF(cancel) return 1;
- dev_err(DEV, "resync inactive, but callback triggered??\n");
- return 1; /* Simply ignore this! */
-}
-
void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest)
{
struct hash_desc desc;
@@ -355,7 +348,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
if (!get_ldev(mdev))
return -EIO;
- if (drbd_rs_should_slow_down(mdev))
+ if (drbd_rs_should_slow_down(mdev, sector))
goto defer;
/* GFP_TRY, because if there is no memory available right now, this may
@@ -373,9 +366,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
return 0;
- /* drbd_submit_ee currently fails for one reason only:
- * not being able to allocate enough bios.
- * Is dropping the connection going to help? */
+ /* If it failed because of ENOMEM, retry should help. If it failed
+ * because bio_add_page failed (probably broken lower level driver),
+ * retry may or may not help.
+ * If it does not, you may need to force disconnect. */
spin_lock_irq(&mdev->req_lock);
list_del(&e->w.list);
spin_unlock_irq(&mdev->req_lock);
@@ -386,26 +380,25 @@ defer:
return -EAGAIN;
}
-void resync_timer_fn(unsigned long data)
+int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
- struct drbd_conf *mdev = (struct drbd_conf *) data;
- int queue;
-
- queue = 1;
switch (mdev->state.conn) {
case C_VERIFY_S:
- mdev->resync_work.cb = w_make_ov_request;
+ w_make_ov_request(mdev, w, cancel);
break;
case C_SYNC_TARGET:
- mdev->resync_work.cb = w_make_resync_request;
+ w_make_resync_request(mdev, w, cancel);
break;
- default:
- queue = 0;
- mdev->resync_work.cb = w_resync_inactive;
}
- /* harmless race: list_empty outside data.work.q_lock */
- if (list_empty(&mdev->resync_work.list) && queue)
+ return 1;
+}
+
+void resync_timer_fn(unsigned long data)
+{
+ struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+ if (list_empty(&mdev->resync_work.list))
drbd_queue_work(&mdev->data.work, &mdev->resync_work);
}
@@ -438,7 +431,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value)
fb->values[i] += value;
}
-int drbd_rs_controller(struct drbd_conf *mdev)
+static int drbd_rs_controller(struct drbd_conf *mdev)
{
unsigned int sect_in; /* Number of sectors that came in since the last turn */
unsigned int want; /* The number of sectors we want in the proxy */
@@ -492,29 +485,36 @@ int drbd_rs_controller(struct drbd_conf *mdev)
return req_sect;
}
-int w_make_resync_request(struct drbd_conf *mdev,
- struct drbd_work *w, int cancel)
+static int drbd_rs_number_requests(struct drbd_conf *mdev)
+{
+ int number;
+ if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
+ mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
+ } else {
+ mdev->c_sync_rate = mdev->sync_conf.rate;
+ number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
+ }
+
+ /* ignore the amount of pending requests, the resync controller should
+ * throttle down to incoming reply rate soon enough anyways. */
+ return number;
+}
+
+static int w_make_resync_request(struct drbd_conf *mdev,
+ struct drbd_work *w, int cancel)
{
unsigned long bit;
sector_t sector;
const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
- int max_segment_size;
- int number, rollback_i, size, pe, mx;
+ int max_bio_size;
+ int number, rollback_i, size;
int align, queued, sndbuf;
int i = 0;
if (unlikely(cancel))
return 1;
- if (unlikely(mdev->state.conn < C_CONNECTED)) {
- dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
- return 0;
- }
-
- if (mdev->state.conn != C_SYNC_TARGET)
- dev_err(DEV, "%s in w_make_resync_request\n",
- drbd_conn_str(mdev->state.conn));
-
if (mdev->rs_total == 0) {
/* empty resync? */
drbd_resync_finished(mdev);
@@ -527,49 +527,19 @@ int w_make_resync_request(struct drbd_conf *mdev,
to continue resync with a broken disk makes no sense at
all */
dev_err(DEV, "Disk broke down during resync!\n");
- mdev->resync_work.cb = w_resync_inactive;
return 1;
}
/* starting with drbd 8.3.8, we can handle multi-bio EEs,
* if it should be necessary */
- max_segment_size =
- mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) :
- mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE;
+ max_bio_size =
+ mdev->agreed_pro_version < 94 ? queue_max_hw_sectors(mdev->rq_queue) << 9 :
+ mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_BIO_SIZE;
- if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
- number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
- mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
- } else {
- mdev->c_sync_rate = mdev->sync_conf.rate;
- number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
- }
-
- /* Throttle resync on lower level disk activity, which may also be
- * caused by application IO on Primary/SyncTarget.
- * Keep this after the call to drbd_rs_controller, as that assumes
- * to be called as precisely as possible every SLEEP_TIME,
- * and would be confused otherwise. */
- if (drbd_rs_should_slow_down(mdev))
+ number = drbd_rs_number_requests(mdev);
+ if (number == 0)
goto requeue;
- mutex_lock(&mdev->data.mutex);
- if (mdev->data.socket)
- mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
- else
- mx = 1;
- mutex_unlock(&mdev->data.mutex);
-
- /* For resync rates >160MB/sec, allow more pending RS requests */
- if (number > mx)
- mx = number;
-
- /* Limit the number of pending RS requests to no more than the peer's receive buffer */
- pe = atomic_read(&mdev->rs_pending_cnt);
- if ((pe + number) > mx) {
- number = mx - pe;
- }
-
for (i = 0; i < number; i++) {
/* Stop generating RS requests, when half of the send buffer is filled */
mutex_lock(&mdev->data.mutex);
@@ -588,16 +558,16 @@ next_sector:
size = BM_BLOCK_SIZE;
bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
- if (bit == -1UL) {
+ if (bit == DRBD_END_OF_BITMAP) {
mdev->bm_resync_fo = drbd_bm_bits(mdev);
- mdev->resync_work.cb = w_resync_inactive;
put_ldev(mdev);
return 1;
}
sector = BM_BIT_TO_SECT(bit);
- if (drbd_try_rs_begin_io(mdev, sector)) {
+ if (drbd_rs_should_slow_down(mdev, sector) ||
+ drbd_try_rs_begin_io(mdev, sector)) {
mdev->bm_resync_fo = bit;
goto requeue;
}
@@ -608,7 +578,7 @@ next_sector:
goto next_sector;
}
-#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
+#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
/* try to find some adjacent bits.
* we stop if we have already the maximum req size.
*
@@ -618,7 +588,7 @@ next_sector:
align = 1;
rollback_i = i;
for (;;) {
- if (size + BM_BLOCK_SIZE > max_segment_size)
+ if (size + BM_BLOCK_SIZE > max_bio_size)
break;
/* Be always aligned */
@@ -685,7 +655,6 @@ next_sector:
* resync data block, and the last bit is cleared.
* until then resync "work" is "inactive" ...
*/
- mdev->resync_work.cb = w_resync_inactive;
put_ldev(mdev);
return 1;
}
@@ -706,27 +675,18 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
if (unlikely(cancel))
return 1;
- if (unlikely(mdev->state.conn < C_CONNECTED)) {
- dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
- return 0;
- }
-
- number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
- if (atomic_read(&mdev->rs_pending_cnt) > number)
- goto requeue;
-
- number -= atomic_read(&mdev->rs_pending_cnt);
+ number = drbd_rs_number_requests(mdev);
sector = mdev->ov_position;
for (i = 0; i < number; i++) {
if (sector >= capacity) {
- mdev->resync_work.cb = w_resync_inactive;
return 1;
}
size = BM_BLOCK_SIZE;
- if (drbd_try_rs_begin_io(mdev, sector)) {
+ if (drbd_rs_should_slow_down(mdev, sector) ||
+ drbd_try_rs_begin_io(mdev, sector)) {
mdev->ov_position = sector;
goto requeue;
}
@@ -744,11 +704,33 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
mdev->ov_position = sector;
requeue:
+ mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
return 1;
}
+void start_resync_timer_fn(unsigned long data)
+{
+ struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+ drbd_queue_work(&mdev->data.work, &mdev->start_resync_work);
+}
+
+int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
+ dev_warn(DEV, "w_start_resync later...\n");
+ mdev->start_resync_timer.expires = jiffies + HZ/10;
+ add_timer(&mdev->start_resync_timer);
+ return 1;
+ }
+
+ drbd_start_resync(mdev, C_SYNC_SOURCE);
+ clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
+ return 1;
+}
+
int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
kfree(w);
@@ -782,6 +764,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
union drbd_state os, ns;
struct drbd_work *w;
char *khelper_cmd = NULL;
+ int verify_done = 0;
/* Remove all elements from the resync LRU. Since future actions
* might set bits in the (main) bitmap, then the entries in the
@@ -792,9 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
* queue (or even the read operations for those packets
* is not finished by now). Retry in 100ms. */
- drbd_kick_lo(mdev);
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 10);
+ schedule_timeout_interruptible(HZ / 10);
w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
if (w) {
w->cb = w_resync_finished;
@@ -819,6 +800,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
spin_lock_irq(&mdev->req_lock);
os = mdev->state;
+ verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
+
/* This protects us against multiple calls (that can happen in the presence
of application IO), and against connectivity loss just before we arrive here. */
if (os.conn <= C_CONNECTED)
@@ -828,8 +811,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
ns.conn = C_CONNECTED;
dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
- (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ?
- "Online verify " : "Resync",
+ verify_done ? "Online verify " : "Resync",
dt + mdev->rs_paused, mdev->rs_paused, dbdt);
n_oos = drbd_bm_total_weight(mdev);
@@ -887,14 +869,18 @@ int drbd_resync_finished(struct drbd_conf *mdev)
}
}
- drbd_uuid_set_bm(mdev, 0UL);
-
- if (mdev->p_uuid) {
- /* Now the two UUID sets are equal, update what we
- * know of the peer. */
- int i;
- for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
- mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+ if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
+ /* for verify runs, we don't update uuids here,
+ * so there would be nothing to report. */
+ drbd_uuid_set_bm(mdev, 0UL);
+ drbd_print_uuids(mdev, "updated UUIDs");
+ if (mdev->p_uuid) {
+ /* Now the two UUID sets are equal, update what we
+ * know of the peer. */
+ int i;
+ for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+ mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+ }
}
}
@@ -906,15 +892,11 @@ out:
mdev->rs_total = 0;
mdev->rs_failed = 0;
mdev->rs_paused = 0;
- mdev->ov_start_sector = 0;
+ if (verify_done)
+ mdev->ov_start_sector = 0;
drbd_md_sync(mdev);
- if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
- dev_info(DEV, "Writing the whole bitmap\n");
- drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
- }
-
if (khelper_cmd)
drbd_khelper(mdev, khelper_cmd);
@@ -995,7 +977,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
put_ldev(mdev);
}
- if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+ if (mdev->state.conn == C_AHEAD) {
+ ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
+ } else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
inc_rs_pending(mdev);
ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
@@ -1097,25 +1081,27 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
if (unlikely(cancel))
goto out;
- if (unlikely((e->flags & EE_WAS_ERROR) != 0))
- goto out;
-
digest_size = crypto_hash_digestsize(mdev->verify_tfm);
- /* FIXME if this allocation fails, online verify will not terminate! */
digest = kmalloc(digest_size, GFP_NOIO);
- if (digest) {
- drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
- inc_rs_pending(mdev);
- ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
- digest, digest_size, P_OV_REPLY);
- if (!ok)
- dec_rs_pending(mdev);
- kfree(digest);
+ if (!digest) {
+ ok = 0; /* terminate the connection in case the allocation failed */
+ goto out;
}
+ if (likely(!(e->flags & EE_WAS_ERROR)))
+ drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+ else
+ memset(digest, 0, digest_size);
+
+ inc_rs_pending(mdev);
+ ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
+ digest, digest_size, P_OV_REPLY);
+ if (!ok)
+ dec_rs_pending(mdev);
+ kfree(digest);
+
out:
drbd_free_ee(mdev, e);
-
dec_unacked(mdev);
return ok;
@@ -1130,7 +1116,6 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
mdev->ov_last_oos_size = size>>9;
}
drbd_set_out_of_sync(mdev, sector, size);
- set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
}
int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -1166,10 +1151,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
eq = !memcmp(digest, di->digest, digest_size);
kfree(digest);
}
- } else {
- ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
- if (__ratelimit(&drbd_ratelimit_state))
- dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
}
dec_unacked(mdev);
@@ -1183,7 +1164,13 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
drbd_free_ee(mdev, e);
- if (--mdev->ov_left == 0) {
+ --mdev->ov_left;
+
+ /* let's advance progress step marks only for every other megabyte */
+ if ((mdev->ov_left & 0x200) == 0x200)
+ drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+ if (mdev->ov_left == 0) {
ov_oos_print(mdev);
drbd_resync_finished(mdev);
}
@@ -1236,6 +1223,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
}
+int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_request *req = container_of(w, struct drbd_request, w);
+ int ok;
+
+ if (unlikely(cancel)) {
+ req_mod(req, send_canceled);
+ return 1;
+ }
+
+ ok = drbd_send_oos(mdev, req);
+ req_mod(req, oos_handed_to_network);
+
+ return ok;
+}
+
/**
* w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
* @mdev: DRBD device.
@@ -1431,6 +1434,17 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
return retcode;
}
+void drbd_rs_controller_reset(struct drbd_conf *mdev)
+{
+ atomic_set(&mdev->rs_sect_in, 0);
+ atomic_set(&mdev->rs_sect_ev, 0);
+ mdev->rs_in_flight = 0;
+ mdev->rs_planed = 0;
+ spin_lock(&mdev->peer_seq_lock);
+ fifo_set(&mdev->rs_plan_s, 0);
+ spin_unlock(&mdev->peer_seq_lock);
+}
+
/**
* drbd_start_resync() - Start the resync process
* @mdev: DRBD device.
@@ -1444,13 +1458,18 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
union drbd_state ns;
int r;
- if (mdev->state.conn >= C_SYNC_SOURCE) {
+ if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
dev_err(DEV, "Resync already running!\n");
return;
}
- /* In case a previous resync run was aborted by an IO error/detach on the peer. */
- drbd_rs_cancel_all(mdev);
+ if (mdev->state.conn < C_AHEAD) {
+ /* In case a previous resync run was aborted by an IO error/detach on the peer. */
+ drbd_rs_cancel_all(mdev);
+ /* This should be done when we abort the resync. We definitely do not
+ want to have this for connections going back and forth between
+ Ahead/Behind and SyncSource/SyncTarget */
+ }
if (side == C_SYNC_TARGET) {
/* Since application IO was locked out during C_WF_BITMAP_T and
@@ -1464,6 +1483,20 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
return;
}
+ } else /* C_SYNC_SOURCE */ {
+ r = drbd_khelper(mdev, "before-resync-source");
+ r = (r >> 8) & 0xff;
+ if (r > 0) {
+ if (r == 3) {
+ dev_info(DEV, "before-resync-source handler returned %d, "
+ "ignoring. Old userland tools?", r);
+ } else {
+ dev_info(DEV, "before-resync-source handler returned %d, "
+ "dropping connection.\n", r);
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return;
+ }
+ }
}
drbd_state_lock(mdev);
@@ -1473,18 +1506,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
return;
}
- if (side == C_SYNC_TARGET) {
- mdev->bm_resync_fo = 0;
- } else /* side == C_SYNC_SOURCE */ {
- u64 uuid;
-
- get_random_bytes(&uuid, sizeof(u64));
- drbd_uuid_set(mdev, UI_BITMAP, uuid);
- drbd_send_sync_uuid(mdev, uuid);
-
- D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
- }
-
write_lock_irq(&global_state_lock);
ns = mdev->state;
@@ -1522,13 +1543,24 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
_drbd_pause_after(mdev);
}
write_unlock_irq(&global_state_lock);
- put_ldev(mdev);
if (r == SS_SUCCESS) {
dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
drbd_conn_str(ns.conn),
(unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
(unsigned long) mdev->rs_total);
+ if (side == C_SYNC_TARGET)
+ mdev->bm_resync_fo = 0;
+
+ /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
+ * with w_send_oos, or the sync target will get confused as to
+ * how much bits to resync. We cannot do that always, because for an
+ * empty resync and protocol < 95, we need to do it here, as we call
+ * drbd_resync_finished from here in that case.
+ * We drbd_gen_and_send_sync_uuid here for protocol < 96,
+ * and from after_state_ch otherwise. */
+ if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96)
+ drbd_gen_and_send_sync_uuid(mdev);
if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
/* This still has a race (about when exactly the peers
@@ -1548,13 +1580,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
drbd_resync_finished(mdev);
}
- atomic_set(&mdev->rs_sect_in, 0);
- atomic_set(&mdev->rs_sect_ev, 0);
- mdev->rs_in_flight = 0;
- mdev->rs_planed = 0;
- spin_lock(&mdev->peer_seq_lock);
- fifo_set(&mdev->rs_plan_s, 0);
- spin_unlock(&mdev->peer_seq_lock);
+ drbd_rs_controller_reset(mdev);
/* ns.conn may already be != mdev->state.conn,
* we may have been paused in between, or become paused until
* the timer triggers.
@@ -1564,6 +1590,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
drbd_md_sync(mdev);
}
+ put_ldev(mdev);
drbd_state_unlock(mdev);
}