From 3129b1b9aed15bbebde1b2a5719434273feb295d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 11 Nov 2010 10:47:05 +0100 Subject: drbd: debug: limit nelink-broadcast of request on digest mismatch to 32k We used to be limited to 32k requests, but have increased that limit to 128k now. This part of the code can only deal with 32k, it would scramble arbitrary pages for larger requests. As it is used for debugging only anyways, it is ok to simply truncate the dumped data here. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 8cbfaa687d7..dad559810ed 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2398,10 +2398,11 @@ void drbd_bcast_ee(struct drbd_conf *mdev, tl = tl_add_int(tl, T_ee_sector, &e->sector); tl = tl_add_int(tl, T_ee_block_id, &e->block_id); + /* dump the first 32k */ + len = min_t(unsigned, e->size, 32 << 10); put_unaligned(T_ee_data, tl++); - put_unaligned(e->size, tl++); + put_unaligned(len, tl++); - len = e->size; page = e->pages; page_chain_for_each(page) { void *d = kmap_atomic(page, KM_USER0); @@ -2410,6 +2411,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev, kunmap_atomic(d, KM_USER0); tl = (unsigned short*)((char*)tl + l); len -= l; + if (len == 0) + break; } put_unaligned(TT_END, tl++); /* Close the tag list */ -- cgit v1.2.3-70-g09d2 From 1816a2b47afae838e53a177d5d166cc7be97d6b5 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 11 Nov 2010 15:19:07 +0100 Subject: drbd: properly use max_hw_sectors to limit the our bio size To ease tracking of bios in some hash tables, we want it to not cross certain boundaries (128k, used to be 32k). We limit the maximum bio size using queue parameters. Historically some defines and variables we use there have been named max_segment_size, which was misguided. Rename them to max_bio_size, and use [blk_]queue_max_hw_sectors where appropriate. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 8 ++++---- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_nl.c | 27 +++++++++++++-------------- drivers/block/drbd/drbd_receiver.c | 18 +++++++++--------- drivers/block/drbd/drbd_req.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 12 ++++++------ 7 files changed, 41 insertions(+), 42 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index b3f18545b46..b4adb58c747 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -952,7 +952,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, int wake_up = 0; unsigned long flags; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; @@ -1002,7 +1002,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, /* * this is intended to set one request worth of data out of sync. * affects at least 1 bit, - * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits. + * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. * * called by tl_clear and drbd_send_dblock (==drbd_make_request). * so this can be _any_ process. @@ -1015,7 +1015,7 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, unsigned int enr, count; struct lc_element *e; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "sector: %llus, size: %d\n", (unsigned long long)sector, size); return; @@ -1387,7 +1387,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 85207b275e4..bcba2742cfb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -512,7 +512,7 @@ struct p_sizes { u64 d_size; /* size of disk */ u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ - u32 max_segment_size; /* Maximal size of a BIO */ + u32 max_bio_size; /* Maximal size of a BIO */ u16 queue_order_type; /* not yet implemented in DRBD*/ u16 dds_flags; /* use enum dds_flags here. */ } __packed; @@ -1398,7 +1398,7 @@ struct bm_extent { * With a value of 8 all IO in one 128K block make it to the same slot of the * hash table. */ #define HT_SHIFT 8 -#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT)) +#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 451fc36a85c..9d9c2ed31e9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1924,7 +1924,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.d_size = cpu_to_be64(d_size); p.u_size = cpu_to_be64(u_size); p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); - p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); + p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9); p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); @@ -2952,7 +2952,7 @@ static void drbd_destroy_mempools(void) static int drbd_create_mempools(void) { struct page *page; - const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count; + const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count; int i; /* prepare our caches and mempools */ @@ -3218,7 +3218,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) q->backing_dev_info.congested_data = mdev; blk_queue_make_request(q, drbd_make_request_26); - blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); + blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_merge_bvec(q, drbd_merge_bvec); q->queue_lock = &mdev->req_lock; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index dad559810ed..9e27d82a9a1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -765,22 +765,21 @@ static int drbd_check_al_size(struct drbd_conf *mdev) return 0; } -void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local) +void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) { struct request_queue * const q = mdev->rq_queue; struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; int max_segments = mdev->ldev->dc.max_bio_bvecs; + int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s); - - blk_queue_max_hw_sectors(q, max_seg_s >> 9); - blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); - blk_queue_max_segment_size(q, max_seg_s); blk_queue_logical_block_size(q, 512); - blk_queue_segment_boundary(q, PAGE_SIZE-1); - blk_stack_limits(&q->limits, &b->limits, 0); + blk_queue_max_hw_sectors(q, max_hw_sectors); + /* This is the workaround for "bio would need to, but cannot, be split" */ + blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); + blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); + blk_queue_stack_limits(q, b); - dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q)); + dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", @@ -858,7 +857,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp struct block_device *bdev; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; - unsigned int max_seg_s; + unsigned int max_bio_size; int rv; int cp_discovered = 0; int logical_block_size; @@ -1109,20 +1108,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp mdev->read_cnt = 0; mdev->writ_cnt = 0; - max_seg_s = DRBD_MAX_SEGMENT_SIZE; + max_bio_size = DRBD_MAX_BIO_SIZE; if (mdev->state.conn == C_CONNECTED) { /* We are Primary, Connected, and now attach a new local * backing store. We must not increase the user visible maximum * bio size on this device to something the peer may not be * able to handle. */ if (mdev->agreed_pro_version < 94) - max_seg_s = queue_max_segment_size(mdev->rq_queue); + max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; else if (mdev->agreed_pro_version == 94) - max_seg_s = DRBD_MAX_SIZE_H80_PACKET; + max_bio_size = DRBD_MAX_SIZE_H80_PACKET; /* else: drbd 8.3.9 and later, stay with default */ } - drbd_setup_queue_param(mdev, max_seg_s); + drbd_setup_queue_param(mdev, max_bio_size); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ca213c6e5f9..79e7b57006b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -277,7 +277,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; int i; - if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) + if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count) i = page_chain_free(page); else { struct page *tmp; @@ -1240,7 +1240,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ data_size -= dgs; ERR_IF(data_size & 0x1ff) return NULL; - ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; + ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL; /* even though we trust out peer, * we sometimes have to double check. */ @@ -1917,7 +1917,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); return FALSE; @@ -2897,7 +2897,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned { struct p_sizes *p = &mdev->data.rbuf.sizes; enum determine_dev_size dd = unchanged; - unsigned int max_seg_s; + unsigned int max_bio_size; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -2970,14 +2970,14 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } if (mdev->agreed_pro_version < 94) - max_seg_s = be32_to_cpu(p->max_segment_size); + max_bio_size = be32_to_cpu(p->max_bio_size); else if (mdev->agreed_pro_version == 94) - max_seg_s = DRBD_MAX_SIZE_H80_PACKET; + max_bio_size = DRBD_MAX_SIZE_H80_PACKET; else /* drbd 8.3.8 onwards */ - max_seg_s = DRBD_MAX_SEGMENT_SIZE; + max_bio_size = DRBD_MAX_BIO_SIZE; - if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) - drbd_setup_queue_param(mdev, max_seg_s); + if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9) + drbd_setup_queue_param(mdev, max_bio_size); drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); put_ldev(mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ad3fc6228f2..08f53ce9b88 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1047,7 +1047,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) /* can this bio be split generically? * Maybe add our own split-arbitrary-bios function. */ - if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { + if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) { /* rather error out here than BUG in bio_split */ dev_err(DEV, "bio would need to, but cannot, be split: " "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", @@ -1098,7 +1098,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) } /* This is called by bio_add_page(). With this function we reduce - * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs + * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs * units (was AL_EXTENTs). * * we do the calculation within the lower 32bit of the byte offsets, @@ -1118,8 +1118,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct unsigned int bio_size = bvm->bi_size; int limit, backing_limit; - limit = DRBD_MAX_SEGMENT_SIZE - - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size); + limit = DRBD_MAX_BIO_SIZE + - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size); if (limit < 0) limit = 0; if (bio_size == 0) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index af805efc94d..782d87237cb 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -524,7 +524,7 @@ int w_make_resync_request(struct drbd_conf *mdev, unsigned long bit; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - int max_segment_size; + int max_bio_size; int number, rollback_i, size; int align, queued, sndbuf; int i = 0; @@ -559,9 +559,9 @@ int w_make_resync_request(struct drbd_conf *mdev, /* starting with drbd 8.3.8, we can handle multi-bio EEs, * if it should be necessary */ - max_segment_size = - mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) : - mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE; + max_bio_size = + mdev->agreed_pro_version < 94 ? queue_max_hw_sectors(mdev->rq_queue) << 9 : + mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_BIO_SIZE; number = drbd_rs_number_requests(mdev); if (number == 0) @@ -605,7 +605,7 @@ next_sector: goto next_sector; } -#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE +#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE /* try to find some adjacent bits. * we stop if we have already the maximum req size. * @@ -615,7 +615,7 @@ next_sector: align = 1; rollback_i = i; for (;;) { - if (size + BM_BLOCK_SIZE > max_segment_size) + if (size + BM_BLOCK_SIZE > max_bio_size) break; /* Be always aligned */ -- cgit v1.2.3-70-g09d2 From 422028b1ca4c07995af82a18abced022ff4c296c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Oct 2010 11:12:07 +0200 Subject: drbd: New configuration parameters for dealing with network congestion net { on_congestion {block|pull-ahead|disconnect}; congestion-fill {sectors}; congestion-extents {al-extents}; } Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 7 +++++++ include/linux/drbd.h | 7 +++++++ include/linux/drbd_limits.h | 9 +++++++++ include/linux/drbd_nl.h | 3 +++ 4 files changed, 26 insertions(+) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9e27d82a9a1..f969d8717e2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1323,6 +1323,8 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, new_conf->wire_protocol = DRBD_PROT_C; new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; + new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; + new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { retcode = ERR_MANDATORY_TAG; @@ -1344,6 +1346,11 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } + if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { + retcode = ERR_CONG_NOT_PROTO_A; + goto fail; + } + if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { retcode = ERR_DISCARD; goto fail; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index ef44c7a0638..03a08baabf1 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -96,6 +96,12 @@ enum drbd_on_no_data { OND_SUSPEND_IO }; +enum drbd_on_congestion { + OC_BLOCK, + OC_PULL_AHEAD, + OC_DISCONNECT, +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_codes { ERR_CODE_BASE = 100, @@ -146,6 +152,7 @@ enum drbd_ret_codes { ERR_PERM = 152, ERR_NEED_APV_93 = 153, ERR_STONITH_AND_PROT_A = 154, + ERR_CONG_NOT_PROTO_A = 155, /* insert new ones above this line */ AFTER_LAST_ERR_CODE diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 4ac33f34b77..abf418724e5 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -129,6 +129,7 @@ #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR +#define DRBD_ON_CONGESTION_DEF OC_BLOCK #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 @@ -154,5 +155,13 @@ #define DRBD_C_MIN_RATE_MAX (4 << 20) #define DRBD_C_MIN_RATE_DEF 4096 +#define DRBD_CONG_FILL_MIN 0 +#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ +#define DRBD_CONG_FILL_DEF 0 + +#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN +#define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX +#define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF + #undef RANGE #endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ade91107c9a..8cde3945d1f 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -56,6 +56,9 @@ NL_PACKET(net_conf, 5, NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) + NL_INTEGER( 81, T_MAY_IGNORE, on_congestion) + NL_INTEGER( 82, T_MAY_IGNORE, cong_fill) + NL_INTEGER( 83, T_MAY_IGNORE, cong_extents) /* 59 addr_family was available in GIT, never released */ NL_BIT( 60, T_MANDATORY, mind_af) NL_BIT( 27, T_MAY_IGNORE, want_lose) -- cgit v1.2.3-70-g09d2 From 3e3a7766c2e6995ac98e7855017abc3544d54e08 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 24 Nov 2010 10:41:45 +0100 Subject: drbd: use kzalloc and memset(,0,) to start with clean buffers in drbd_nl Make sure we start with clean buffers to not accidentally send garbage back to userspace. Note: has not been observed; but just in case. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f969d8717e2..80a389d24cd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2211,7 +2211,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms reply_size += cm->reply_body_size; /* allocation not in the IO path, cqueue thread context */ - cn_reply = kmalloc(reply_size, GFP_KERNEL); + cn_reply = kzalloc(reply_size, GFP_KERNEL); if (!cn_reply) { retcode = ERR_NOMEM; goto fail; @@ -2382,7 +2382,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, /* receiver thread context, which is not in the writeout path (of this node), * but may be in the writeout path of the _other_ node. * GFP_NOIO to avoid potential "distributed deadlock". */ - cn_reply = kmalloc( + cn_reply = kzalloc( sizeof(struct cn_msg)+ sizeof(struct drbd_nl_cfg_reply)+ sizeof(struct dump_ee_tag_len_struct)+ @@ -2517,6 +2517,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) (struct drbd_nl_cfg_reply *)cn_reply->data; int rr; + memset(buffer, 0, sizeof(buffer)); cn_reply->id = req->id; cn_reply->seq = req->seq; -- cgit v1.2.3-70-g09d2 From 42ff269d1022a86be4f526cf674998c47b7ab856 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 24 Nov 2010 10:11:14 +0100 Subject: drbd: add packet_type 27 (return_code_only) to netlink api In case we ever should add an other packet type, we must not reuse 27, as that currently used for "empty" return code only replies. Document it as such. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 6 ++++-- include/linux/drbd_nl.h | 6 +++++- include/linux/drbd_tag_magic.h | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 80a389d24cd..6a6dde6c51c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2195,7 +2195,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms goto fail; } - if (nlp->packet_type >= P_nl_after_last_packet) { + if (nlp->packet_type >= P_nl_after_last_packet || + nlp->packet_type == P_return_code_only) { retcode = ERR_PACKET_NR; goto fail; } @@ -2219,7 +2220,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms reply = (struct drbd_nl_cfg_reply *) cn_reply->data; reply->packet_type = - cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet; + cm->reply_body_size ? nlp->packet_type : P_return_code_only; reply->minor = nlp->drbd_minor; reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ /* reply->tag_list; might be modified by cm->function. */ @@ -2525,6 +2526,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) cn_reply->len = sizeof(struct drbd_nl_cfg_reply); cn_reply->flags = 0; + reply->packet_type = P_return_code_only; reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; reply->ret_code = ret_code; diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 8cde3945d1f..6fc614b06c4 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -146,9 +146,13 @@ NL_PACKET(new_c_uuid, 26, NL_BIT( 63, T_MANDATORY, clear_bm) ) +#ifdef NL_RESPONSE +NL_RESPONSE(return_code_only, 27) +#endif + #undef NL_PACKET #undef NL_INTEGER #undef NL_INT64 #undef NL_BIT #undef NL_STRING - +#undef NL_RESPONSE diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h index fcdff8410e9..f14a165e82d 100644 --- a/include/linux/drbd_tag_magic.h +++ b/include/linux/drbd_tag_magic.h @@ -7,6 +7,7 @@ /* declare packet_type enums */ enum packet_types { #define NL_PACKET(name, number, fields) P_ ## name = number, +#define NL_RESPONSE(name, number) P_ ## name = number, #define NL_INTEGER(pn, pr, member) #define NL_INT64(pn, pr, member) #define NL_BIT(pn, pr, member) -- cgit v1.2.3-70-g09d2 From 2561b9c1f1d63077c41903fc6ad58dc9ec47248b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 3 Dec 2010 15:22:48 +0100 Subject: drbd: --force option for disconnect As the network connection can be lost at any time, a --force option for disconnect is just a matter of completeness. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 15 +++++++++++++++ include/linux/drbd_nl.h | 4 +++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 6a6dde6c51c..cd0459f0403 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1531,6 +1531,21 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl struct drbd_nl_cfg_reply *reply) { int retcode; + struct disconnect dc; + + memset(&dc, 0, sizeof(struct disconnect)); + if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) { + retcode = ERR_MANDATORY_TAG; + goto fail; + } + + if (dc.force) { + spin_lock_irq(&mdev->req_lock); + if (mdev->state.conn >= C_WF_CONNECTION) + _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); + spin_unlock_irq(&mdev->req_lock); + goto done; + } retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 6fc614b06c4..ab6159e4fcf 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -69,7 +69,9 @@ NL_PACKET(net_conf, 5, NL_BIT( 70, T_MANDATORY, dry_run) ) -NL_PACKET(disconnect, 6, ) +NL_PACKET(disconnect, 6, + NL_BIT( 84, T_MAY_IGNORE, force) +) NL_PACKET(resize, 7, NL_INT64( 29, T_MAY_IGNORE, resize_size) -- cgit v1.2.3-70-g09d2 From 116676ca621a862a8124969772f4dd61c8b40eee Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 8 Dec 2010 13:33:11 +0100 Subject: drbd: Rename enum drbd_ret_codes to enum drbd_ret_code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 4 ++-- include/linux/drbd.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f43e2aa354a..8d69e3a1b3c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3570,7 +3570,7 @@ void drbd_md_sync(struct drbd_conf *mdev) * @mdev: DRBD device. * @bdev: Device from which the meta data should be read in. * - * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case + * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cd0459f0403..fe336592e53 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -849,7 +849,7 @@ static void drbd_suspend_al(struct drbd_conf *mdev) static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - enum drbd_ret_codes retcode; + enum drbd_ret_code retcode; enum determine_dev_size dd; sector_t max_possible_sectors; sector_t min_md_device_sectors; @@ -1278,7 +1278,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int i, ns; - enum drbd_ret_codes retcode; + enum drbd_ret_code retcode; struct net_conf *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 41da654cc0b..d92f989036c 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -103,7 +103,7 @@ enum drbd_on_congestion { }; /* KEEP the order, do not delete or insert. Only append. */ -enum drbd_ret_codes { +enum drbd_ret_code { ERR_CODE_BASE = 100, NO_ERROR = 101, ERR_LOCAL_ADDR = 102, -- cgit v1.2.3-70-g09d2 From bf885f8a6772fb48409dd505a09d974a5e621f22 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 8 Dec 2010 00:39:32 +0100 Subject: drbd: Be more explicit about functions that return an enum drbd_state_rv Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 32 ++++++++++++--------- drivers/block/drbd/drbd_main.c | 59 +++++++++++++++++++++----------------- drivers/block/drbd/drbd_nl.c | 29 ++++++++++--------- drivers/block/drbd/drbd_receiver.c | 2 +- 4 files changed, 67 insertions(+), 55 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c81710a4211..749607a494e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1174,14 +1174,19 @@ enum dds_flags { }; extern void drbd_init_set_defaults(struct drbd_conf *mdev); -extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, - union drbd_state mask, union drbd_state val); +extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, + enum chg_state_flags f, + union drbd_state mask, + union drbd_state val); extern void drbd_force_state(struct drbd_conf *, union drbd_state, union drbd_state); -extern int _drbd_request_state(struct drbd_conf *, union drbd_state, - union drbd_state, enum chg_state_flags); -extern int __drbd_set_state(struct drbd_conf *, union drbd_state, - enum chg_state_flags, struct completion *done); +extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, + union drbd_state, + union drbd_state, + enum chg_state_flags); +extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, + enum chg_state_flags, + struct completion *done); extern void print_st_err(struct drbd_conf *, union drbd_state, union drbd_state, int); extern int drbd_thread_start(struct drbd_thread *thi); @@ -1245,7 +1250,7 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) extern int drbd_send_bitmap(struct drbd_conf *mdev); extern int _drbd_send_bitmap(struct drbd_conf *mdev); -extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); +extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); @@ -1493,8 +1498,9 @@ enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); -extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, - int force); +extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, + enum drbd_role new_role, + int force); extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); @@ -1761,11 +1767,11 @@ static inline void drbd_state_unlock(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); } -static inline int _drbd_set_state(struct drbd_conf *mdev, - union drbd_state ns, enum chg_state_flags flags, - struct completion *done) +static inline enum drbd_state_rv +_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, + enum chg_state_flags flags, struct completion *done) { - int rv; + enum drbd_state_rv rv; read_lock(&global_state_lock); rv = __drbd_set_state(mdev, ns, flags, done); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cddf311b742..a101dceb2d4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -473,12 +473,13 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); } -int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, - union drbd_state mask, union drbd_state val) +enum drbd_state_rv +drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state mask, union drbd_state val) { unsigned long flags; union drbd_state os, ns; - int rv; + enum drbd_state_rv rv; spin_lock_irqsave(&mdev->req_lock, flags); os = mdev->state; @@ -502,9 +503,10 @@ void drbd_force_state(struct drbd_conf *mdev, drbd_change_state(mdev, CS_HARD, mask, val); } -static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); -static int is_valid_state_transition(struct drbd_conf *, - union drbd_state, union drbd_state); +static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); +static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, + union drbd_state, + union drbd_state); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, const char **warn_sync_abort); int drbd_send_state_req(struct drbd_conf *, @@ -516,7 +518,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, { union drbd_state os, ns; unsigned long flags; - int rv; + enum drbd_state_rv rv; if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) return SS_CW_SUCCESS; @@ -537,7 +539,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (rv == SS_SUCCESS) { rv = is_valid_state_transition(mdev, ns, os); if (rv == SS_SUCCESS) - rv = 0; /* cont waiting, otherwise fail. */ + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } } spin_unlock_irqrestore(&mdev->req_lock, flags); @@ -555,14 +557,14 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, * Should not be called directly, use drbd_request_state() or * _drbd_request_state(). */ -static int drbd_req_state(struct drbd_conf *mdev, - union drbd_state mask, union drbd_state val, - enum chg_state_flags f) +static enum drbd_state_rv +drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) { struct completion done; unsigned long flags; union drbd_state os, ns; - int rv; + enum drbd_state_rv rv; init_completion(&done); @@ -637,10 +639,11 @@ abort: * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE * flag, or when logging of failed state change requests is not desired. */ -int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val, enum chg_state_flags f) +enum drbd_state_rv +_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) { - int rv; + enum drbd_state_rv rv; wait_event(mdev->state_wait, (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); @@ -664,8 +667,8 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) ); } -void print_st_err(struct drbd_conf *mdev, - union drbd_state os, union drbd_state ns, int err) +void print_st_err(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum drbd_state_rv err) { if (err == SS_IN_TRANSIENT_STATE) return; @@ -680,12 +683,13 @@ void print_st_err(struct drbd_conf *mdev, * @mdev: DRBD device. * @ns: State to consider. */ -static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) +static enum drbd_state_rv +is_valid_state(struct drbd_conf *mdev, union drbd_state ns) { /* See drbd_state_sw_errors in drbd_strings.c */ enum drbd_fencing_p fp; - int rv = SS_SUCCESS; + enum drbd_state_rv rv = SS_SUCCESS; fp = FP_DONT_CARE; if (get_ldev(mdev)) { @@ -748,10 +752,11 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) * @ns: new state. * @os: old state. */ -static int is_valid_state_transition(struct drbd_conf *mdev, - union drbd_state ns, union drbd_state os) +static enum drbd_state_rv +is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, + union drbd_state os) { - int rv = SS_SUCCESS; + enum drbd_state_rv rv = SS_SUCCESS; if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && os.conn > C_CONNECTED) @@ -1029,12 +1034,12 @@ static void drbd_resume_al(struct drbd_conf *mdev) * * Caller needs to hold req_lock, and global_state_lock. Do not call directly. */ -int __drbd_set_state(struct drbd_conf *mdev, - union drbd_state ns, enum chg_state_flags flags, - struct completion *done) +enum drbd_state_rv +__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, + enum chg_state_flags flags, struct completion *done) { union drbd_state os; - int rv = SS_SUCCESS; + enum drbd_state_rv rv = SS_SUCCESS; const char *warn_sync_abort = NULL; struct after_state_chg_work *ascw; @@ -2031,7 +2036,7 @@ int drbd_send_state_req(struct drbd_conf *mdev, (struct p_header80 *)&p, sizeof(p)); } -int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) +int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) { struct p_req_state_reply p; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fe336592e53..7a2faf6d48e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -288,10 +288,11 @@ void drbd_try_outdate_peer_async(struct drbd_conf *mdev) dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); } -int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) +enum drbd_state_rv +drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) { const int max_tries = 4; - int r = 0; + enum drbd_state_rv rv = SS_UNKNOWN_ERROR; int try = 0; int forced = 0; union drbd_state mask, val; @@ -306,17 +307,17 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) val.i = 0; val.role = new_role; while (try++ < max_tries) { - r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); + rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); /* in case we first succeeded to outdate, * but now suddenly could establish a connection */ - if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { + if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { val.pdsk = 0; mask.pdsk = 0; continue; } - if (r == SS_NO_UP_TO_DATE_DISK && force && + if (rv == SS_NO_UP_TO_DATE_DISK && force && (mdev->state.disk < D_UP_TO_DATE && mdev->state.disk >= D_INCONSISTENT)) { mask.disk = D_MASK; @@ -325,7 +326,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) continue; } - if (r == SS_NO_UP_TO_DATE_DISK && + if (rv == SS_NO_UP_TO_DATE_DISK && mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { D_ASSERT(mdev->state.pdsk == D_UNKNOWN); nps = drbd_try_outdate_peer(mdev); @@ -341,9 +342,9 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) continue; } - if (r == SS_NOTHING_TO_DO) + if (rv == SS_NOTHING_TO_DO) goto fail; - if (r == SS_PRIMARY_NOP && mask.pdsk == 0) { + if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { nps = drbd_try_outdate_peer(mdev); if (force && nps > D_OUTDATED) { @@ -356,7 +357,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) continue; } - if (r == SS_TWO_PRIMARIES) { + if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ __set_current_state(TASK_INTERRUPTIBLE); @@ -365,16 +366,16 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) try = max_tries - 1; continue; } - if (r < SS_SUCCESS) { - r = _drbd_request_state(mdev, mask, val, + if (rv < SS_SUCCESS) { + rv = _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_WAIT_COMPLETE); - if (r < SS_SUCCESS) + if (rv < SS_SUCCESS) goto fail; } break; } - if (r < SS_SUCCESS) + if (rv < SS_SUCCESS) goto fail; if (forced) @@ -423,7 +424,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: mutex_unlock(&mdev->state_mutex); - return r; + return rv; } static struct drbd_conf *ensure_mdev(int minor, int create) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d34d093278c..c116cbdc9dc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3112,7 +3112,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi { struct p_req_state *p = &mdev->data.rbuf.req_state; union drbd_state mask, val; - int rv; + enum drbd_state_rv rv; mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); -- cgit v1.2.3-70-g09d2 From 81e84650c200de0695372461964dd960365696db Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 9 Dec 2010 15:03:57 +0100 Subject: drbd: Use the standard bool, true, and false keywords Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 8 +-- drivers/block/drbd/drbd_bitmap.c | 4 +- drivers/block/drbd/drbd_int.h | 13 +--- drivers/block/drbd/drbd_main.c | 30 ++++----- drivers/block/drbd/drbd_nl.c | 4 +- drivers/block/drbd/drbd_receiver.c | 128 ++++++++++++++++++------------------- drivers/block/drbd/drbd_req.c | 4 +- drivers/block/drbd/drbd_worker.c | 4 +- 8 files changed, 94 insertions(+), 101 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 2e8a95ce79b..e3f0f4d31d7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -338,7 +338,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) + mdev->ldev->md.al_offset + mdev->al_tr_pos; if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) - drbd_chk_io_error(mdev, 1, TRUE); + drbd_chk_io_error(mdev, 1, true); if (++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) @@ -528,7 +528,7 @@ static void atodb_endio(struct bio *bio, int error) if (!error && !uptodate) error = -EIO; - drbd_chk_io_error(mdev, error, TRUE); + drbd_chk_io_error(mdev, error, true); if (error && wc->error == 0) wc->error = error; @@ -991,7 +991,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, if (count && get_ldev(mdev)) { drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev)); spin_lock_irqsave(&mdev->al_lock, flags); - drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); + drbd_try_clear_on_disk_bm(mdev, sector, count, true); spin_unlock_irqrestore(&mdev->al_lock, flags); /* just wake_up unconditional now, various lc_chaged(), @@ -1441,7 +1441,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) mdev->rs_failed += count; if (get_ldev(mdev)) { - drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); + drbd_try_clear_on_disk_bm(mdev, sector, count, false); put_ldev(mdev); } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 5dafbabe961..9390e952678 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -844,7 +844,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); - drbd_chk_io_error(mdev, 1, TRUE); + drbd_chk_io_error(mdev, 1, true); err = -EIO; } @@ -916,7 +916,7 @@ int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(lo dev_err(DEV, "IO ERROR writing bitmap sector %lu " "(meta-disk sector %llus)\n", enr, (unsigned long long)on_disk_sector); - drbd_chk_io_error(mdev, 1, TRUE); + drbd_chk_io_error(mdev, 1, true); for (i = 0; i < AL_EXT_PER_BM_SECT; i++) drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 749607a494e..0855934e19d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -72,13 +72,6 @@ extern int fault_devs; extern char usermode_helper[]; -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - /* I don't remember why XCPU ... * This is used to wake the asender, * and to interrupt sending the sending task @@ -2002,17 +1995,17 @@ static inline int drbd_send_ping_ack(struct drbd_conf *mdev) static inline void drbd_thread_stop(struct drbd_thread *thi) { - _drbd_thread_stop(thi, FALSE, TRUE); + _drbd_thread_stop(thi, false, true); } static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) { - _drbd_thread_stop(thi, FALSE, FALSE); + _drbd_thread_stop(thi, false, false); } static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) { - _drbd_thread_stop(thi, TRUE, FALSE); + _drbd_thread_stop(thi, true, false); } /* counts how many answer packets packets we expect from our peer, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a101dceb2d4..7eb447d20cc 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -456,7 +456,7 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) } /** - * cl_wide_st_chg() - TRUE if the state change is a cluster wide one + * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. * @os: old (current) state. * @ns: new (wanted) state. @@ -1623,7 +1623,7 @@ int drbd_thread_start(struct drbd_thread *thi) if (!try_module_get(THIS_MODULE)) { dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); spin_unlock_irqrestore(&thi->t_lock, flags); - return FALSE; + return false; } init_completion(&thi->stop); @@ -1640,7 +1640,7 @@ int drbd_thread_start(struct drbd_thread *thi) dev_err(DEV, "Couldn't start thread\n"); module_put(THIS_MODULE); - return FALSE; + return false; } spin_lock_irqsave(&thi->t_lock, flags); thi->task = nt; @@ -1660,7 +1660,7 @@ int drbd_thread_start(struct drbd_thread *thi) break; } - return TRUE; + return true; } @@ -1758,8 +1758,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, { int sent, ok; - ERR_IF(!h) return FALSE; - ERR_IF(!size) return FALSE; + ERR_IF(!h) return false; + ERR_IF(!size) return false; h->magic = BE_DRBD_MAGIC; h->command = cpu_to_be16(cmd); @@ -2196,14 +2196,14 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) struct p_header80 *p; int ret; - ERR_IF(!mdev->bitmap) return FALSE; + ERR_IF(!mdev->bitmap) return false; /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ p = (struct p_header80 *) __get_free_page(GFP_NOIO); if (!p) { dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); - return FALSE; + return false; } if (get_ldev(mdev)) { @@ -2256,7 +2256,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) p.set_size = cpu_to_be32(set_size); if (mdev->state.conn < C_CONNECTED) - return FALSE; + return false; ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, (struct p_header80 *)&p, sizeof(p)); return ok; @@ -2284,7 +2284,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) - return FALSE; + return false; ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, (struct p_header80 *)&p, sizeof(p)); return ok; @@ -2390,8 +2390,8 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) } /* called on sndtimeo - * returns FALSE if we should retry, - * TRUE if we think connection is dead + * returns false if we should retry, + * true if we think connection is dead */ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) { @@ -2404,7 +2404,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * || mdev->state.conn < C_CONNECTED; if (drop_it) - return TRUE; + return true; drop_it = !--mdev->ko_count; if (!drop_it) { @@ -3283,7 +3283,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) goto out_no_disk; mdev->vdisk = disk; - set_disk_ro(disk, TRUE); + set_disk_ro(disk, true); disk->queue = q; disk->major = DRBD_MAJOR; @@ -3560,7 +3560,7 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { /* this was a try anyways ... */ dev_err(DEV, "meta data update failed!\n"); - drbd_chk_io_error(mdev, 1, TRUE); + drbd_chk_io_error(mdev, 1, true); } /* Update mdev->ldev->md.la_size_sect, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7a2faf6d48e..9e94c348c42 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -385,7 +385,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); if (new_role == R_SECONDARY) { - set_disk_ro(mdev->vdisk, TRUE); + set_disk_ro(mdev->vdisk, true); if (get_ldev(mdev)) { mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; put_ldev(mdev); @@ -395,7 +395,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) mdev->net_conf->want_lose = 0; put_net_conf(mdev); } - set_disk_ro(mdev->vdisk, FALSE); + set_disk_ro(mdev->vdisk, false); if (get_ldev(mdev)) { if (((mdev->state.conn < C_CONNECTED || mdev->state.pdsk <= D_FAILED) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 00dcb1172ca..732aacb46a3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -725,16 +725,16 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) char tb[4]; if (!*sock) - return FALSE; + return false; rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); if (rr > 0 || rr == -EAGAIN) { - return TRUE; + return true; } else { sock_release(*sock); *sock = NULL; - return FALSE; + return false; } } @@ -933,7 +933,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi r = drbd_recv(mdev, h, sizeof(*h)); if (unlikely(r != sizeof(*h))) { dev_err(DEV, "short read expecting header on sock: r=%d\n", r); - return FALSE; + return false; } if (likely(h->h80.magic == BE_DRBD_MAGIC)) { @@ -947,11 +947,11 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi be32_to_cpu(h->h80.magic), be16_to_cpu(h->h80.command), be16_to_cpu(h->h80.length)); - return FALSE; + return false; } mdev->last_received = jiffies; - return TRUE; + return true; } static void drbd_flush(struct drbd_conf *mdev) @@ -1160,7 +1160,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign switch (mdev->write_ordering) { case WO_none: if (rv == FE_RECYCLED) - return TRUE; + return true; /* receiver context, in the writeout path of the other node. * avoid potential distributed deadlock */ @@ -1188,10 +1188,10 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign D_ASSERT(atomic_read(&epoch->active) == 0); D_ASSERT(epoch->flags == 0); - return TRUE; + return true; default: dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); - return FALSE; + return false; } epoch->flags = 0; @@ -1209,7 +1209,7 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign } spin_unlock(&mdev->epoch_lock); - return TRUE; + return true; } /* used from receive_RSDataReply (recv_resync_read) @@ -1303,7 +1303,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) void *data; if (!data_size) - return TRUE; + return true; page = drbd_pp_alloc(mdev, 1, 1); @@ -1426,7 +1426,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si atomic_add(data_size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) - return TRUE; + return true; /* drbd_submit_ee currently fails for one reason only: * not being able to allocate enough bios. @@ -1438,7 +1438,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si drbd_free_ee(mdev, e); fail: put_ldev(mdev); - return FALSE; + return false; } static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) @@ -1455,7 +1455,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) { dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); - return FALSE; + return false; } /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid @@ -1655,7 +1655,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned e = read_in_block(mdev, p->block_id, sector, data_size); if (!e) { put_ldev(mdev); - return FALSE; + return false; } e->w.cb = e_end_block; @@ -1774,7 +1774,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned put_ldev(mdev); wake_asender(mdev); finish_wait(&mdev->misc_wait, &wait); - return TRUE; + return true; } if (signal_pending(current)) { @@ -1830,7 +1830,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) - return TRUE; + return true; /* drbd_submit_ee currently fails for one reason only: * not being able to allocate enough bios. @@ -1848,7 +1848,7 @@ out_interrupted: * receive a barrier... atomic_inc(&mdev->epoch_size); */ put_ldev(mdev); drbd_free_ee(mdev, e); - return FALSE; + return false; } /* We may throttle resync, if the lower device seems to be busy, @@ -1934,12 +1934,12 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); - return FALSE; + return false; } if (sector + (size>>9) > capacity) { dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); - return FALSE; + return false; } if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { @@ -1976,7 +1976,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); if (!e) { put_ldev(mdev); - return FALSE; + return false; } switch (cmd) { @@ -2089,7 +2089,7 @@ submit: spin_unlock_irq(&mdev->req_lock); if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) - return TRUE; + return true; /* drbd_submit_ee currently fails for one reason only: * not being able to allocate enough bios. @@ -2102,7 +2102,7 @@ submit: out_free_e: put_ldev(mdev); drbd_free_ee(mdev, e); - return FALSE; + return false; } static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) @@ -2690,7 +2690,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig unsigned char *my_alg = mdev->net_conf->integrity_alg; if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) - return FALSE; + return false; p_integrity_alg[SHARED_SECRET_MAX-1] = 0; if (strcmp(p_integrity_alg, my_alg)) { @@ -2701,11 +2701,11 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig my_alg[0] ? my_alg : (unsigned char *)""); } - return TRUE; + return true; disconnect: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return FALSE; + return false; } /* helper function @@ -2737,7 +2737,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) { - int ok = TRUE; + int ok = true; struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; @@ -2755,7 +2755,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi if (packet_size > exp_max_sz) { dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", packet_size, exp_max_sz); - return FALSE; + return false; } if (apv <= 88) { @@ -2775,7 +2775,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) - return FALSE; + return false; mdev->sync_conf.rate = be32_to_cpu(p->rate); @@ -2785,11 +2785,11 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi dev_err(DEV, "verify-alg too long, " "peer wants %u, accepting only %u byte\n", data_size, SHARED_SECRET_MAX); - return FALSE; + return false; } if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) - return FALSE; + return false; /* we expect NUL terminated string */ /* but just in case someone tries to be evil */ @@ -2883,7 +2883,7 @@ disconnect: /* but free the verify_tfm again, if csums_tfm did not work out */ crypto_free_hash(verify_tfm); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return FALSE; + return false; } static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) @@ -2920,7 +2920,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (p_size == 0 && mdev->state.disk == D_DISKLESS) { dev_err(DEV, "some backing storage is needed\n"); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return FALSE; + return false; } /* just store the peer's disk size for now. @@ -2957,7 +2957,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); mdev->ldev->dc.disk_size = my_usize; put_ldev(mdev); - return FALSE; + return false; } put_ldev(mdev); } @@ -2967,7 +2967,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dd = drbd_determin_dev_size(mdev, ddsf); put_ldev(mdev); if (dd == dev_size_error) - return FALSE; + return false; drbd_md_sync(mdev); } else { /* I am diskless, need to accept the peer's size. */ @@ -3014,7 +3014,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } } - return TRUE; + return true; } static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) @@ -3038,7 +3038,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", (unsigned long long)mdev->ed_uuid); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return FALSE; + return false; } if (get_ldev(mdev)) { @@ -3073,7 +3073,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); - return TRUE; + return true; } /** @@ -3118,7 +3118,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); - return TRUE; + return true; } mask = convert_state(mask); @@ -3129,7 +3129,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi drbd_send_sr_reply(mdev, rv); drbd_md_sync(mdev); - return TRUE; + return true; } static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) @@ -3174,7 +3174,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned peer_state.conn == C_CONNECTED) { if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) drbd_resync_finished(mdev); - return TRUE; + return true; } } @@ -3227,10 +3227,10 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned real_peer_disk = D_DISKLESS; } else { if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) - return FALSE; + return false; D_ASSERT(os.conn == C_WF_REPORT_PARAMS); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return FALSE; + return false; } } } @@ -3255,7 +3255,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); - return FALSE; + return false; } rv = _drbd_set_state(mdev, ns, cs_flags, NULL); ns = mdev->state; @@ -3263,7 +3263,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (rv < SS_SUCCESS) { drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return FALSE; + return false; } if (os.conn > C_WF_REPORT_PARAMS) { @@ -3281,7 +3281,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ - return TRUE; + return true; } static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) @@ -3308,7 +3308,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi } else dev_err(DEV, "Ignoring SyncUUID packet!\n"); - return TRUE; + return true; } enum receive_bitmap_ret { OK, DONE, FAILED }; @@ -3462,7 +3462,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne struct bm_xfer_ctx c; void *buffer; enum receive_bitmap_ret ret; - int ok = FALSE; + int ok = false; struct p_header80 *h = &mdev->data.rbuf.header.h80; /* drbd_bm_lock(mdev, "receive bitmap"); By intention no bm_lock */ @@ -3535,7 +3535,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne drbd_conn_str(mdev->state.conn)); } - ok = TRUE; + ok = true; out: /* drbd_bm_unlock(mdev); by intention no lock */ if (ok && mdev->state.conn == C_WF_BITMAP_S) @@ -3569,7 +3569,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u * with the data requests being unplugged */ drbd_tcp_quickack(mdev->data.socket); - return TRUE; + return true; } static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) @@ -3578,7 +3578,7 @@ static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, un drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); - return TRUE; + return true; } typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); @@ -4147,7 +4147,7 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) } wake_up(&mdev->state_wait); - return TRUE; + return true; } static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) @@ -4163,7 +4163,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) wake_up(&mdev->misc_wait); - return TRUE; + return true; } static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) @@ -4186,7 +4186,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) dec_rs_pending(mdev); atomic_add(blksize >> 9, &mdev->rs_sect_in); - return TRUE; + return true; } /* when we receive the ACK for a write request, @@ -4230,14 +4230,14 @@ static int validate_req_change_req_state(struct drbd_conf *mdev, if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func); - return FALSE; + return false; } __req_mod(req, what, &m); spin_unlock_irq(&mdev->req_lock); if (m.bio) complete_master_bio(mdev, &m); - return TRUE; + return true; } static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4252,7 +4252,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) if (is_syncer_block_id(p->block_id)) { drbd_set_in_sync(mdev, sector, blksize); dec_rs_pending(mdev); - return TRUE; + return true; } switch (be16_to_cpu(h->command)) { case P_RS_WRITE_ACK: @@ -4273,7 +4273,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) break; default: D_ASSERT(0); - return FALSE; + return false; } return validate_req_change_req_state(mdev, p->block_id, sector, @@ -4294,7 +4294,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) int size = be32_to_cpu(p->blksize); dec_rs_pending(mdev); drbd_rs_failed_io(mdev, sector, size); - return TRUE; + return true; } return validate_req_change_req_state(mdev, p->block_id, sector, _ack_id_to_req, __func__ , neg_acked); @@ -4332,7 +4332,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) put_ldev(mdev); } - return TRUE; + return true; } static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4349,7 +4349,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) drbd_queue_work_front(&mdev->data.work, w); } - return TRUE; + return true; } static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) @@ -4370,7 +4370,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) ov_oos_print(mdev); if (!get_ldev(mdev)) - return TRUE; + return true; drbd_rs_complete_io(mdev, sector); dec_rs_pending(mdev); @@ -4393,12 +4393,12 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) } } put_ldev(mdev); - return TRUE; + return true; } static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) { - return TRUE; + return true; } struct asender_cmd { diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 53e7cc50645..528909090df 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -445,7 +445,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - __drbd_chk_io_error(mdev, FALSE); + __drbd_chk_io_error(mdev, false); _req_may_be_done_not_susp(req, m); put_ldev(mdev); break; @@ -466,7 +466,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(!(req->rq_state & RQ_NET_MASK)); - __drbd_chk_io_error(mdev, FALSE); + __drbd_chk_io_error(mdev, false); put_ldev(mdev); /* no point in retrying if there is no good remote data, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e201f6f82c0..9fe3e890da0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -96,7 +96,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &e->flags)) - __drbd_chk_io_error(mdev, FALSE); + __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->req_lock, flags); drbd_queue_work(&mdev->data.work, &e->w); @@ -139,7 +139,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo : list_empty(&mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &e->flags)) - __drbd_chk_io_error(mdev, FALSE); + __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->req_lock, flags); if (is_syncer_req) -- cgit v1.2.3-70-g09d2 From f2024e7ce29f4287395ce879364cd68c7ac226f2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 10 Dec 2010 13:44:05 +0100 Subject: drbd: drbd_nl_disk_conf: Avoid a compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warning: comparison between ‘enum drbd_ret_code’ and ‘enum drbd_state_rv’ Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9e94c348c42..ada5483f7e5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -859,7 +859,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp struct lru_cache *resync_lru = NULL; union drbd_state ns, os; unsigned int max_bio_size; - int rv; + enum drbd_state_rv rv; int cp_discovered = 0; int logical_block_size; @@ -1005,9 +1005,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* and for any other previously queued work */ drbd_flush_workqueue(mdev); - retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); + rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); + retcode = rv; /* FIXME: Type mismatch. */ drbd_resume_io(mdev); - if (retcode < SS_SUCCESS) + if (rv < SS_SUCCESS) goto fail; if (!get_ldev_if_state(mdev, D_ATTACHING)) -- cgit v1.2.3-70-g09d2 From 24dccabb390412d04435e11cfb535df51def7b2d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 12 Dec 2010 17:45:41 +0100 Subject: drbd: Fix: drbd_bitmap_io does not return an enum determine_dev_size I guess bitmap I/O errors are supposed to cause drbd_determin_dev_size to return dev_size_error. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ada5483f7e5..2f072498214 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -643,11 +643,17 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_ || prev_size != mdev->ldev->md.md_size_sect; if (la_size_changed || md_moved) { + int err; + drbd_al_shrink(mdev); /* All extents inactive. */ dev_info(DEV, "Writing the whole bitmap, %s\n", la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); - rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ + err = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ + if (err) { + rv = dev_size_error; + goto out; + } drbd_md_mark_dirty(mdev); } -- cgit v1.2.3-70-g09d2 From 19f843aa08e2d8f87a09b4c2edc43b00638423a8 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 15 Dec 2010 08:59:11 +0100 Subject: drbd: bitmap keep track of changes vs on-disk bitmap When we set or clear bits in a bitmap page, also set a flag in the page->private pointer. This allows us to skip writes of unchanged pages. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 132 +++---------- drivers/block/drbd/drbd_bitmap.c | 409 +++++++++++++++++++++++++++++---------- drivers/block/drbd/drbd_int.h | 7 +- drivers/block/drbd/drbd_main.c | 27 ++- drivers/block/drbd/drbd_nl.c | 11 +- drivers/block/drbd/drbd_worker.c | 7 +- 6 files changed, 373 insertions(+), 220 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e3f0f4d31d7..090fc2ce0df 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -262,6 +262,33 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) spin_unlock_irqrestore(&mdev->al_lock, flags); } +#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) +/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT + * are still coupled, or assume too much about their relation. + * Code below will not work if this is violated. + * Will be cleaned up with some followup patch. + */ +# error FIXME +#endif + +static unsigned int al_extent_to_bm_page(unsigned int al_enr) +{ + return al_enr >> + /* bit to page */ + ((PAGE_SHIFT + 3) - + /* al extent number to bit */ + (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); +} + +static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) +{ + return rs_enr >> + /* bit to page */ + ((PAGE_SHIFT + 3) - + /* al extent number to bit */ + (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); +} + int w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) { @@ -289,7 +316,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) * For now, we must not write the transaction, * if we cannot write out the bitmap of the evicted extent. */ if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) - drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); + drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { @@ -635,105 +662,6 @@ out_bio_put: return err; } -/** - * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents - * @mdev: DRBD device. - * - * Called when we detach (unconfigure) local storage, - * or when we go from R_PRIMARY to R_SECONDARY role. - */ -void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) -{ - int i, nr_elements; - unsigned int enr; - struct bio **bios; - struct drbd_atodb_wait wc; - - ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) - return; /* sorry, I don't have any act_log etc... */ - - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - - nr_elements = mdev->act_log->nr_elements; - - /* GFP_KERNEL, we are not in anyone's write-out path */ - bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); - if (!bios) - goto submit_one_by_one; - - atomic_set(&wc.count, 0); - init_completion(&wc.io_done); - wc.mdev = mdev; - wc.error = 0; - - for (i = 0; i < nr_elements; i++) { - enr = lc_element_by_index(mdev->act_log, i)->lc_number; - if (enr == LC_FREE) - continue; - /* next statement also does atomic_inc wc.count and local_cnt */ - if (atodb_prepare_unless_covered(mdev, bios, - enr/AL_EXT_PER_BM_SECT, - &wc)) - goto free_bios_submit_one_by_one; - } - - /* unnecessary optimization? */ - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - - /* all prepared, submit them */ - for (i = 0; i < nr_elements; i++) { - if (bios[i] == NULL) - break; - if (drbd_insert_fault(mdev, DRBD_FAULT_MD_WR)) { - bios[i]->bi_rw = WRITE; - bio_endio(bios[i], -EIO); - } else { - submit_bio(WRITE, bios[i]); - } - } - - /* always (try to) flush bitmap to stable storage */ - drbd_md_flush(mdev); - - /* In case we did not submit a single IO do not wait for - * them to complete. ( Because we would wait forever here. ) - * - * In case we had IOs and they are already complete, there - * is not point in waiting anyways. - * Therefore this if () ... */ - if (atomic_read(&wc.count)) - wait_for_completion(&wc.io_done); - - put_ldev(mdev); - - kfree(bios); - return; - - free_bios_submit_one_by_one: - /* free everything by calling the endio callback directly. */ - for (i = 0; i < nr_elements && bios[i]; i++) - bio_endio(bios[i], 0); - - kfree(bios); - - submit_one_by_one: - dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); - - for (i = 0; i < mdev->act_log->nr_elements; i++) { - enr = lc_element_by_index(mdev->act_log, i)->lc_number; - if (enr == LC_FREE) - continue; - /* Really slow: if we have al-extents 16..19 active, - * sector 4 will be written four times! Synchronous! */ - drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); - } - - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - put_ldev(mdev); -} - /** * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents * @mdev: DRBD device. @@ -813,7 +741,7 @@ static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused return 1; } - drbd_bm_write_sect(mdev, udw->enr); + drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); put_ldev(mdev); kfree(udw); @@ -893,7 +821,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, dev_warn(DEV, "Kicking resync_lru element enr=%u " "out with rs_failed=%d\n", ext->lce.lc_number, ext->rs_failed); - set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); } ext->rs_left = rs_left; ext->rs_failed = success ? 0 : count; @@ -912,7 +839,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, drbd_queue_work_front(&mdev->data.work, &udw->w); } else { dev_warn(DEV, "Could not kmalloc an udw\n"); - set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); } } } else { diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 8d959ed6c2c..72cd41a96ef 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -70,8 +70,7 @@ struct drbd_bitmap { sector_t bm_dev_capacity; struct mutex bm_change; /* serializes resize operations */ - atomic_t bm_async_io; - wait_queue_head_t bm_io_wait; + wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ unsigned long bm_flags; @@ -82,7 +81,7 @@ struct drbd_bitmap { /* definition of bits in bm_flags */ #define BM_LOCKED 0 -#define BM_MD_IO_ERROR 1 +// #define BM_MD_IO_ERROR 1 unused now. #define BM_P_VMALLOCED 2 static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, @@ -155,26 +154,117 @@ void drbd_bm_unlock(struct drbd_conf *mdev) mutex_unlock(&b->bm_change); } -static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) +/* we store some "meta" info about our pages in page->private */ +/* at a granularity of 4k storage per bitmap bit: + * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks + * 1<<38 bits, + * 1<<23 4k bitmap pages. + * Use 24 bits as page index, covers 2 peta byte storage + * at a granularity of 4k per bit. + * Used to report the failed page idx on io error from the endio handlers. + */ +#define BM_PAGE_IDX_MASK ((1UL<<24)-1) +/* this page is currently read in, or written back */ +#define BM_PAGE_IO_LOCK 31 +/* if there has been an IO error for this page */ +#define BM_PAGE_IO_ERROR 30 +/* this is to be able to intelligently skip disk IO, + * set if bits have been set since last IO. */ +#define BM_PAGE_NEED_WRITEOUT 29 +/* to mark for lazy writeout once syncer cleared all clearable bits, + * we if bits have been cleared since last IO. */ +#define BM_PAGE_LAZY_WRITEOUT 28 + +/* store_page_idx uses non-atomic assingment. It is only used directly after + * allocating the page. All other bm_set_page_* and bm_clear_page_* need to + * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap + * changes) may happen from various contexts, and wait_on_bit/wake_up_bit + * requires it all to be atomic as well. */ +static void bm_store_page_idx(struct page *page, unsigned long idx) { - /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ - unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); - BUG_ON(page_nr >= b->bm_number_of_pages); - return page_nr; + BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); + page_private(page) |= idx; } -/* word offset to long pointer */ -static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) +static unsigned long bm_page_to_idx(struct page *page) { - struct page *page; - unsigned long page_nr; + return page_private(page) & BM_PAGE_IDX_MASK; +} + +/* As is very unlikely that the same page is under IO from more than one + * context, we can get away with a bit per page and one wait queue per bitmap. + */ +static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr) +{ + struct drbd_bitmap *b = mdev->bitmap; + void *addr = &page_private(b->bm_pages[page_nr]); + wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr)); +} + +static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) +{ + struct drbd_bitmap *b = mdev->bitmap; + void *addr = &page_private(b->bm_pages[page_nr]); + clear_bit(BM_PAGE_IO_LOCK, addr); + smp_mb__after_clear_bit(); + wake_up(&mdev->bitmap->bm_io_wait); +} + +/* set _before_ submit_io, so it may be reset due to being changed + * while this page is in flight... will get submitted later again */ +static void bm_set_page_unchanged(struct page *page) +{ + /* use cmpxchg? */ + clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); + clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); +} + +static void bm_set_page_need_writeout(struct page *page) +{ + set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); +} + +static int bm_test_page_unchanged(struct page *page) +{ + volatile const unsigned long *addr = &page_private(page); + return (*addr & ((1UL<> PAGE_SHIFT; */ - page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); + unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3); BUG_ON(page_nr >= b->bm_number_of_pages); - page = b->bm_pages[page_nr]; + return page_nr; +} - return (unsigned long *) kmap_atomic(page, km); +static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) +{ + /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ + unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); + BUG_ON(page_nr >= b->bm_number_of_pages); + return page_nr; } static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) @@ -188,11 +278,6 @@ static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) return __bm_map_pidx(b, idx, KM_IRQ1); } -static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) -{ - return __bm_map_paddr(b, offset, KM_IRQ1); -} - static void __bm_unmap(unsigned long *p_addr, const enum km_type km) { kunmap_atomic(p_addr, km); @@ -222,6 +307,7 @@ static void bm_unmap(unsigned long *p_addr) * to be able to report device specific. */ + static void bm_free_pages(struct page **pages, unsigned long number) { unsigned long i; @@ -289,6 +375,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) bm_vk_free(new_pages, vmalloced); return NULL; } + /* we want to know which page it is + * from the endio handlers */ + bm_store_page_idx(page, i); new_pages[i] = page; } } else { @@ -443,7 +532,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) while (offset < words) { i = do_now = min_t(size_t, words-offset, LWPP); - p_addr = __bm_map_paddr(b, offset, KM_USER0); + p_addr = __bm_map_pidx(b, bm_word_to_page_idx(b, offset), KM_USER0); bm = p_addr + MLPP(offset); while (i--) { bits += hweight_long(*bm++); @@ -472,6 +561,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) { unsigned long *p_addr, *bm; + unsigned int idx; size_t do_now, end; #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) @@ -485,7 +575,8 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) while (offset < end) { do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; - p_addr = bm_map_paddr(b, offset); + idx = bm_word_to_page_idx(b, offset); + p_addr = bm_map_pidx(b, idx); bm = p_addr + MLPP(offset); if (bm+do_now > p_addr + LWPP) { printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", @@ -494,6 +585,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) } memset(bm, c, do_now * sizeof(long)); bm_unmap(p_addr); + bm_set_page_need_writeout(b->bm_pages[idx]); offset += do_now; } } @@ -604,7 +696,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) bm_free_pages(opages + want, have - want); } - p_addr = bm_map_paddr(b, words); + p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, words)); bm = p_addr + MLPP(words); *bm = DRBD_MAGIC; bm_unmap(p_addr); @@ -616,7 +708,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) bm_vk_free(opages, opages_vmalloced); if (!growing) b->bm_set = bm_count_bits(b); - dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); + dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); out: drbd_bm_unlock(mdev); @@ -686,6 +778,7 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, struct drbd_bitmap *b = mdev->bitmap; unsigned long *p_addr, *bm; unsigned long word, bits; + unsigned int idx; size_t end, do_now; end = offset + number; @@ -700,7 +793,8 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, spin_lock_irq(&b->bm_lock); while (offset < end) { do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; - p_addr = bm_map_paddr(b, offset); + idx = bm_word_to_page_idx(b, offset); + p_addr = bm_map_pidx(b, idx); bm = p_addr + MLPP(offset); offset += do_now; while (do_now--) { @@ -710,6 +804,7 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, b->bm_set += hweight_long(word) - bits; } bm_unmap(p_addr); + bm_set_page_need_writeout(b->bm_pages[idx]); } /* with 32bit <-> 64bit cross-platform connect * this is only correct for current usage, @@ -748,7 +843,7 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, else { while (offset < end) { do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; - p_addr = bm_map_paddr(b, offset); + p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset)); bm = p_addr + MLPP(offset); offset += do_now; while (do_now--) @@ -786,9 +881,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) spin_unlock_irq(&b->bm_lock); } +struct bm_aio_ctx { + struct drbd_conf *mdev; + atomic_t in_flight; + wait_queue_head_t io_wait; + unsigned flags; +#define BM_AIO_COPY_PAGES 1 + int error; +}; + +/* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { - struct drbd_bitmap *b = bio->bi_private; + struct bm_aio_ctx *ctx = bio->bi_private; + struct drbd_conf *mdev = ctx->mdev; + struct drbd_bitmap *b = mdev->bitmap; + unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); int uptodate = bio_flagged(bio, BIO_UPTODATE); @@ -799,35 +907,79 @@ static void bm_async_io_complete(struct bio *bio, int error) if (!error && !uptodate) error = -EIO; + if (!bm_test_page_unchanged(b->bm_pages[idx])) + dev_info(DEV, "bitmap page idx %u changed during IO!\n", idx); + if (error) { - /* doh. what now? - * for now, set all bits, and flag MD_IO_ERROR */ - __set_bit(BM_MD_IO_ERROR, &b->bm_flags); + /* ctx error will hold the completed-last non-zero error code, + * in case error codes differ. */ + ctx->error = error; + bm_set_page_io_err(b->bm_pages[idx]); + /* Not identical to on disk version of it. + * Is BM_PAGE_IO_ERROR enough? */ + if (__ratelimit(&drbd_ratelimit_state)) + dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n", + error, idx); + } else { + bm_clear_page_io_err(b->bm_pages[idx]); + dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx); } - if (atomic_dec_and_test(&b->bm_async_io)) - wake_up(&b->bm_io_wait); + + bm_page_unlock_io(mdev, idx); + + /* FIXME give back to page pool */ + if (ctx->flags & BM_AIO_COPY_PAGES) + put_page(bio->bi_io_vec[0].bv_page); bio_put(bio); + + if (atomic_dec_and_test(&ctx->in_flight)) + wake_up(&ctx->io_wait); } -static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) +static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { /* we are process context. we always get a bio */ struct bio *bio = bio_alloc(GFP_KERNEL, 1); + struct drbd_conf *mdev = ctx->mdev; + struct drbd_bitmap *b = mdev->bitmap; + struct page *page; unsigned int len; + sector_t on_disk_sector = mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); /* this might happen with very small - * flexible external meta data device */ + * flexible external meta data device, + * or with PAGE_SIZE > 4k */ len = min_t(unsigned int, PAGE_SIZE, (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); + /* serialize IO on this page */ + bm_page_lock_io(mdev, page_nr); + /* before memcpy and submit, + * so it can be redirtied any time */ + bm_set_page_unchanged(b->bm_pages[page_nr]); + + if (ctx->flags & BM_AIO_COPY_PAGES) { + /* FIXME alloc_page is good enough for now, but actually needs + * to use pre-allocated page pool */ + void *src, *dest; + page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); + dest = kmap_atomic(page, KM_USER0); + src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); + memcpy(dest, src, PAGE_SIZE); + kunmap_atomic(src, KM_USER1); + kunmap_atomic(dest, KM_USER0); + bm_store_page_idx(page, page_nr); + } else + page = b->bm_pages[page_nr]; + bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; - bio_add_page(bio, b->bm_pages[page_nr], len, 0); - bio->bi_private = b; + bio_add_page(bio, page, len, 0); + bio->bi_private = ctx; bio->bi_end_io = bm_async_io_complete; if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { @@ -841,36 +993,72 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) +static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) { + struct bm_aio_ctx ctx = + { .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0 }; struct drbd_bitmap *b = mdev->bitmap; - /* sector_t sector; */ - int bm_words, num_pages, i; + int last_page, i, count = 0; unsigned long now; char ppb[10]; int err = 0; - WARN_ON(!bm_is_locked(b)); - - /* no spinlock here, the drbd_bm_lock should be enough! */ + /* + * We are protected against bitmap disappearing/resizing by holding an + * ldev reference (caller must have called get_ldev()). + * For read/write, we are protected against changes to the bitmap by + * the bitmap lock (see drbd_bitmap_io). + * For lazy writeout, we don't care for ongoing changes to the bitmap, + * as we submit copies of pages anyways. + */ + if (!ctx.flags) + WARN_ON(!bm_is_locked(b)); - bm_words = drbd_bm_words(mdev); - num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; + /* because of the "extra long to catch oob access" we allocate in + * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page + * containing the last _relevant_ bitmap word */ + last_page = bm_word_to_page_idx(b, b->bm_words - 1); now = jiffies; - atomic_set(&b->bm_async_io, num_pages); - __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); + ctx.mdev = mdev; + atomic_set(&ctx.in_flight, 1); /* one extra ref */ + init_waitqueue_head(&ctx.io_wait); + ctx.error = 0; /* let the layers below us try to merge these bios... */ - for (i = 0; i < num_pages; i++) - bm_page_io_async(mdev, b, i, rw); + for (i = 0; i <= last_page; i++) { + /* ignore completely unchanged pages */ + if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) + break; + if (rw & WRITE) { + if (bm_test_page_unchanged(b->bm_pages[i])) { + dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); + continue; + } + /* during lazy writeout, + * ignore those pages not marked for lazy writeout. */ + if (lazy_writeout_upper_idx && + !bm_test_page_lazy_writeout(b->bm_pages[i])) { + dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i); + continue; + } + } + atomic_inc(&ctx.in_flight); + bm_page_io_async(&ctx, i, rw); + ++count; + cond_resched(); + } - wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); + atomic_dec(&ctx.in_flight); /* drop the extra ref */ + wait_event(ctx.io_wait, atomic_read(&ctx.in_flight) == 0); + dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); - if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { + if (ctx.error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, true); - err = -EIO; + err = -EIO; /* ctx.error ? */ } now = jiffies; @@ -895,55 +1083,63 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) */ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, READ); + return bm_rw(mdev, READ, 0); } /** * drbd_bm_write() - Write the whole bitmap to its on disk location. * @mdev: DRBD device. + * + * Will only write pages that have changed since last IO. */ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, WRITE); + return bm_rw(mdev, WRITE, 0); } /** - * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap + * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. * @mdev: DRBD device. - * @enr: Extent number in the resync lru (happens to be sector offset) + * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages + */ +int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) +{ + return bm_rw(mdev, WRITE, upper_idx); +} + + +/** + * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap + * @mdev: DRBD device. + * @idx: bitmap page index * - * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered - * by a single sector write. Therefore enr == sector offset from the - * start of the bitmap. + * We don't want to special case on logical_block_size of the underlaying + * device, so we submit PAGE_SIZE aligned pieces containing the requested enr. + * Note that on "most" systems, PAGE_SIZE is 4k. */ -int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) +int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - sector_t on_disk_sector = enr + mdev->ldev->md.md_offset - + mdev->ldev->md.bm_offset; - int bm_words, num_words, offset; - int err = 0; + struct bm_aio_ctx ctx = { .flags = BM_AIO_COPY_PAGES, }; - mutex_lock(&mdev->md_io_mutex); - bm_words = drbd_bm_words(mdev); - offset = S2W(enr); /* word offset into bitmap */ - num_words = min(S2W(1), bm_words - offset); - if (num_words < S2W(1)) - memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); - drbd_bm_get_lel(mdev, offset, num_words, - page_address(mdev->md_io_page)); - if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) { - int i; - err = -EIO; - dev_err(DEV, "IO ERROR writing bitmap sector %lu " - "(meta-disk sector %llus)\n", - enr, (unsigned long long)on_disk_sector); - drbd_chk_io_error(mdev, 1, true); - for (i = 0; i < AL_EXT_PER_BM_SECT; i++) - drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); + if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { + dev_info(DEV, "skipped bm page write for idx %u\n", idx); + return 0; } + + ctx.mdev = mdev; + atomic_set(&ctx.in_flight, 1); + init_waitqueue_head(&ctx.io_wait); + + bm_page_io_async(&ctx, idx, WRITE_SYNC); + wait_event(ctx.io_wait, atomic_read(&ctx.in_flight) == 0); + + if (ctx.error) + drbd_chk_io_error(mdev, 1, true); + /* that should force detach, so the in memory bitmap will be + * gone in a moment as well. */ + mdev->bm_writ_cnt++; - mutex_unlock(&mdev->md_io_mutex); - return err; + return ctx.error; } /* NOTE @@ -965,10 +1161,9 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); } else { while (bm_fo < b->bm_bits) { - unsigned long offset; - bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */ - offset = bit_offset >> LN2_BPL; /* word offset of the page */ - p_addr = __bm_map_paddr(b, offset, km); + /* bit offset of the first bit in the page */ + bit_offset = bm_fo & ~BPP_MASK; + p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); if (find_zero_bit) i = generic_find_next_zero_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); @@ -1048,8 +1243,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, struct drbd_bitmap *b = mdev->bitmap; unsigned long *p_addr = NULL; unsigned long bitnr; - unsigned long last_page_nr = -1UL; + unsigned int last_page_nr = -1U; int c = 0; + int changed_total = 0; if (e >= b->bm_bits) { dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", @@ -1057,12 +1253,17 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, e = b->bm_bits ? b->bm_bits -1 : 0; } for (bitnr = s; bitnr <= e; bitnr++) { - unsigned long offset = bitnr>>LN2_BPL; - unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); + unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); if (page_nr != last_page_nr) { if (p_addr) __bm_unmap(p_addr, km); - p_addr = __bm_map_paddr(b, offset, km); + if (c < 0) + bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); + else if (c > 0) + bm_set_page_need_writeout(b->bm_pages[last_page_nr]); + changed_total += c; + c = 0; + p_addr = __bm_map_pidx(b, page_nr, km); last_page_nr = page_nr; } if (val) @@ -1072,8 +1273,13 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, } if (p_addr) __bm_unmap(p_addr, km); - b->bm_set += c; - return c; + if (c < 0) + bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); + else if (c > 0) + bm_set_page_need_writeout(b->bm_pages[last_page_nr]); + changed_total += c; + b->bm_set += changed_total; + return changed_total; } /* returns number of bits actually changed. @@ -1211,8 +1417,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) if (bm_is_locked(b)) bm_print_lock_info(mdev); if (bitnr < b->bm_bits) { - unsigned long offset = bitnr>>LN2_BPL; - p_addr = bm_map_paddr(b, offset); + p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); i = generic_test_le_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; bm_unmap(p_addr); } else if (bitnr == b->bm_bits) { @@ -1231,10 +1436,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi { unsigned long flags; struct drbd_bitmap *b = mdev->bitmap; - unsigned long *p_addr = NULL, page_nr = -1; + unsigned long *p_addr = NULL; unsigned long bitnr; + unsigned int page_nr = -1U; int c = 0; - size_t w; /* If this is called without a bitmap, that is a bug. But just to be * robust in case we screwed up elsewhere, in that case pretend there @@ -1247,12 +1452,12 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi if (bm_is_locked(b)) bm_print_lock_info(mdev); for (bitnr = s; bitnr <= e; bitnr++) { - w = bitnr >> LN2_BPL; - if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) { - page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3); + unsigned int idx = bm_bit_to_page_idx(b, bitnr); + if (page_nr != idx) { + page_nr = idx; if (p_addr) bm_unmap(p_addr); - p_addr = bm_map_paddr(b, w); + p_addr = bm_map_pidx(b, idx); } ERR_IF (bitnr >= b->bm_bits) { dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); @@ -1300,7 +1505,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) count = 0; if (s < b->bm_words) { int n = e-s; - p_addr = bm_map_paddr(b, s); + p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); bm = p_addr + MLPP(s); while (n--) count += hweight_long(*bm++); @@ -1335,7 +1540,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) count = 0; if (s < b->bm_words) { i = do_now = e-s; - p_addr = bm_map_paddr(b, s); + p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); bm = p_addr + MLPP(s); while (i--) { count += hweight_long(*bm); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f6da48bb8c7..74cc50a2182 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -833,7 +833,7 @@ enum { CRASHED_PRIMARY, /* This node was a crashed primary. * Gets cleared when the state.conn * goes into C_CONNECTED state. */ - WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ + NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ CONSIDER_RESYNC, MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ @@ -1428,7 +1428,7 @@ extern void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); -extern int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local); +extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, @@ -1446,7 +1446,7 @@ extern int drbd_bm_rs_done(struct drbd_conf *mdev); /* for receive_bitmap */ extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, unsigned long *buffer); -/* for _drbd_send_bitmap and drbd_bm_write_sect */ +/* for _drbd_send_bitmap */ extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, unsigned long *buffer); @@ -1641,7 +1641,6 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, #define drbd_set_out_of_sync(mdev, sector, size) \ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); -extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); extern void drbd_al_shrink(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 67fffad213e..57ed7181742 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1289,6 +1289,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv) } } +int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) +{ + int rv; + + D_ASSERT(current == mdev->worker.task); + + /* open coded non-blocking drbd_suspend_io(mdev); */ + set_bit(SUSPEND_IO, &mdev->flags); + if (!is_susp(mdev->state)) + D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); + + drbd_bm_lock(mdev, why); + rv = io_fn(mdev); + drbd_bm_unlock(mdev); + + drbd_resume_io(mdev); + + return rv; +} + /** * after_state_ch() - Perform after state change actions that may sleep * @mdev: DRBD device. @@ -1404,7 +1424,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) - drbd_al_to_on_disk_bm(mdev); + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote diskless peer"); + put_ldev(mdev); + } + + if (os.role == R_PRIMARY && ns.role == R_SECONDARY && get_ldev(mdev)) { + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote"); put_ldev(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 2f072498214..77dc022eaf6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -407,10 +407,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } } - if ((new_role == R_SECONDARY) && get_ldev(mdev)) { - drbd_al_to_on_disk_bm(mdev); - put_ldev(mdev); - } + /* writeout of activity log covered areas of the bitmap + * to stable storage done in after state change already */ if (mdev->state.conn >= C_WF_REPORT_PARAMS) { /* if this was forced, we should consider sync */ @@ -1174,7 +1172,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (cp_discovered) { drbd_al_apply_to_bm(mdev); - drbd_al_to_on_disk_bm(mdev); + if (drbd_bitmap_io(mdev, &drbd_bm_write, "crashed primary apply AL")) { + retcode = ERR_IO_MD_DISK; + goto force_diskless_dec; + } } if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9fe3e890da0..d17f2ed777c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -907,10 +907,8 @@ out: drbd_md_sync(mdev); - if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { - dev_info(DEV, "Writing the whole bitmap\n"); - drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); - } + dev_info(DEV, "Writing changed bitmap pages\n"); + drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); if (khelper_cmd) drbd_khelper(mdev, khelper_cmd); @@ -1127,7 +1125,6 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) mdev->ov_last_oos_size = size>>9; } drbd_set_out_of_sync(mdev, sector, size); - set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); } int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -- cgit v1.2.3-70-g09d2 From 4b0715f09655e76ca24c35a9e25e7c464c2f7346 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Dec 2010 15:13:04 +0100 Subject: drbd: allow petabyte storage on 64bit arch Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 170 ++++++++++++++++++++++++--------------- drivers/block/drbd/drbd_int.h | 41 ++++++---- drivers/block/drbd/drbd_nl.c | 8 +- drivers/block/drbd/drbd_proc.c | 6 +- drivers/block/drbd/drbd_worker.c | 2 +- 5 files changed, 142 insertions(+), 85 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 72cd41a96ef..0e31e573af7 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -37,10 +37,46 @@ * convention: * function name drbd_bm_... => used elsewhere, "public". * function name bm_... => internal to implementation, "private". + */ + - * Note that since find_first_bit returns int, at the current granularity of - * the bitmap (4KB per byte), this implementation "only" supports up to - * 1<<(32+12) == 16 TB... +/* + * LIMITATIONS: + * We want to support >= peta byte of backend storage, while for now still using + * a granularity of one bit per 4KiB of storage. + * 1 << 50 bytes backend storage (1 PiB) + * 1 << (50 - 12) bits needed + * 38 --> we need u64 to index and count bits + * 1 << (38 - 3) bitmap bytes needed + * 35 --> we still need u64 to index and count bytes + * (that's 32 GiB of bitmap for 1 PiB storage) + * 1 << (35 - 2) 32bit longs needed + * 33 --> we'd even need u64 to index and count 32bit long words. + * 1 << (35 - 3) 64bit longs needed + * 32 --> we could get away with a 32bit unsigned int to index and count + * 64bit long words, but I rather stay with unsigned long for now. + * We probably should neither count nor point to bytes or long words + * directly, but either by bitnumber, or by page index and offset. + * 1 << (35 - 12) + * 22 --> we need that much 4KiB pages of bitmap. + * 1 << (22 + 3) --> on a 64bit arch, + * we need 32 MiB to store the array of page pointers. + * + * Because I'm lazy, and because the resulting patch was too large, too ugly + * and still incomplete, on 32bit we still "only" support 16 TiB (minus some), + * (1 << 32) bits * 4k storage. + * + + * bitmap storage and IO: + * Bitmap is stored little endian on disk, and is kept little endian in + * core memory. Currently we still hold the full bitmap in core as long + * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage + * seems excessive. + * + * We plan to reduce the amount of in-core bitmap pages by pageing them in + * and out against their on-disk location as necessary, but need to make + * sure we don't cause too much meta data IO, and must not deadlock in + * tight memory situations. This needs some more work. */ /* @@ -56,13 +92,9 @@ struct drbd_bitmap { struct page **bm_pages; spinlock_t bm_lock; - /* WARNING unsigned long bm_*: - * 32bit number of bit offset is just enough for 512 MB bitmap. - * it will blow up if we make the bitmap bigger... - * not that it makes much sense to have a bitmap that large, - * rather change the granularity to 16k or 64k or something. - * (that implies other problems, however...) - */ + + /* see LIMITATIONS: above */ + unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ unsigned long bm_bits; size_t bm_words; @@ -517,43 +549,39 @@ static void bm_set_surplus(struct drbd_bitmap *b) bm_unmap(p_addr); } +/* you better not modify the bitmap while this is running, + * or its results will be stale */ static unsigned long bm_count_bits(struct drbd_bitmap *b) { - unsigned long *p_addr, *bm, offset = 0; + unsigned long *p_addr; unsigned long bits = 0; - unsigned long i, do_now; - unsigned long words; - - /* due to 64bit alignment, the last long on a 32bit arch - * may be not used at all. The last used long will likely - * be only partially used, always. Don't count those bits, - * but mask them out. */ - words = (b->bm_bits + BITS_PER_LONG - 1) >> LN2_BPL; - - while (offset < words) { - i = do_now = min_t(size_t, words-offset, LWPP); - p_addr = __bm_map_pidx(b, bm_word_to_page_idx(b, offset), KM_USER0); - bm = p_addr + MLPP(offset); - while (i--) { - bits += hweight_long(*bm++); - } - offset += do_now; - if (offset == words) { - /* last word may only be partially used, - * see also bm_clear_surplus. */ - i = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1; - if (i) { - bits -= hweight_long(p_addr[do_now-1] & ~i); - p_addr[do_now-1] &= i; - } - /* 32bit arch, may have an unused padding long */ - if (words != b->bm_words) - p_addr[do_now] = 0; - } + unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; + int idx, last_page, i, last_word; + + /* because of the "extra long to catch oob access" we allocate in + * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page + * containing the last _relevant_ bitmap word */ + last_page = bm_bit_to_page_idx(b, b->bm_bits-1); + + /* all but last page */ + for (idx = 0; idx < last_page; idx++) { + p_addr = __bm_map_pidx(b, idx, KM_USER0); + for (i = 0; i < LWPP; i++) + bits += hweight_long(p_addr[i]); __bm_unmap(p_addr, KM_USER0); cond_resched(); } - + /* last (or only) page */ + last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; + p_addr = __bm_map_pidx(b, idx, KM_USER0); + for (i = 0; i < last_word; i++) + bits += hweight_long(p_addr[i]); + p_addr[last_word] &= cpu_to_lel(mask); + bits += hweight_long(p_addr[last_word]); + /* 32bit arch, may have an unused padding long */ + if (BITS_PER_LONG == 32 && (last_word & 1) == 0) + p_addr[last_word+1] = 0; + __bm_unmap(p_addr, KM_USER0); return bits; } @@ -564,8 +592,6 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) unsigned int idx; size_t do_now, end; -#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) - end = offset + len; if (end > b->bm_words) { @@ -645,8 +671,14 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) words = ALIGN(bits, 64) >> LN2_BPL; if (get_ldev(mdev)) { - D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); + u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12; put_ldev(mdev); + if (bits > bits_on_disk) { + dev_info(DEV, "bits = %lu\n", bits); + dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk); + err = -ENOSPC; + goto out; + } } /* one extra long to catch off by one errors */ @@ -1113,9 +1145,12 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l * @mdev: DRBD device. * @idx: bitmap page index * - * We don't want to special case on logical_block_size of the underlaying - * device, so we submit PAGE_SIZE aligned pieces containing the requested enr. + * We don't want to special case on logical_block_size of the backend device, + * so we submit PAGE_SIZE aligned pieces. * Note that on "most" systems, PAGE_SIZE is 4k. + * + * In case this becomes an issue on systems with larger PAGE_SIZE, + * we may want to change this again to write 4k aligned 4k pieces. */ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { @@ -1144,52 +1179,57 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc /* NOTE * find_first_bit returns int, we return unsigned long. - * should not make much difference anyways, but ... + * For this to work on 32bit arch with bitnumbers > (1<<32), + * we'd need to return u64, and get a whole lot of other places + * fixed where we still use unsigned long. * * this returns a bit number, NOT a sector! */ -#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, const int find_zero_bit, const enum km_type km) { struct drbd_bitmap *b = mdev->bitmap; - unsigned long i = -1UL; unsigned long *p_addr; - unsigned long bit_offset; /* bit offset of the mapped page. */ + unsigned long bit_offset; + unsigned i; + if (bm_fo > b->bm_bits) { dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); + bm_fo = DRBD_END_OF_BITMAP; } else { while (bm_fo < b->bm_bits) { /* bit offset of the first bit in the page */ - bit_offset = bm_fo & ~BPP_MASK; + bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); if (find_zero_bit) - i = generic_find_next_zero_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); + i = generic_find_next_zero_le_bit(p_addr, + PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); else - i = generic_find_next_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); + i = generic_find_next_le_bit(p_addr, + PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); __bm_unmap(p_addr, km); if (i < PAGE_SIZE*8) { - i = bit_offset + i; - if (i >= b->bm_bits) + bm_fo = bit_offset + i; + if (bm_fo >= b->bm_bits) break; goto found; } bm_fo = bit_offset + PAGE_SIZE*8; } - i = -1UL; + bm_fo = DRBD_END_OF_BITMAP; } found: - return i; + return bm_fo; } static unsigned long bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, const int find_zero_bit) { struct drbd_bitmap *b = mdev->bitmap; - unsigned long i = -1UL; + unsigned long i = DRBD_END_OF_BITMAP; ERR_IF(!b) return i; ERR_IF(!b->bm_pages) return i; @@ -1267,9 +1307,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, last_page_nr = page_nr; } if (val) - c += (0 == generic___test_and_set_le_bit(bitnr & BPP_MASK, p_addr)); + c += (0 == generic___test_and_set_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); else - c -= (0 != generic___test_and_clear_le_bit(bitnr & BPP_MASK, p_addr)); + c -= (0 != generic___test_and_clear_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); } if (p_addr) __bm_unmap(p_addr, km); @@ -1418,7 +1458,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) bm_print_lock_info(mdev); if (bitnr < b->bm_bits) { p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); - i = generic_test_le_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; + i = generic_test_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0; bm_unmap(p_addr); } else if (bitnr == b->bm_bits) { i = -1; @@ -1517,13 +1557,15 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) return count; } -/* set all bits covered by the AL-extent al_enr */ +/* Set all bits covered by the AL-extent al_enr. + * Returns number of bits changed. */ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) { struct drbd_bitmap *b = mdev->bitmap; unsigned long *p_addr, *bm; unsigned long weight; - int count, s, e, i, do_now; + unsigned long s, e; + int count, i, do_now; ERR_IF(!b) return 0; ERR_IF(!b->bm_pages) return 0; @@ -1552,7 +1594,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) if (e == b->bm_words) b->bm_set -= bm_clear_surplus(b); } else { - dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); + dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); } weight = b->bm_set - weight; spin_unlock_irq(&b->bm_lock); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 74cc50a2182..5a2d0ec72b3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1003,9 +1003,9 @@ struct drbd_conf { struct hlist_head *tl_hash; unsigned int tl_hash_s; - /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ + /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ unsigned long rs_total; - /* number of sync IOs that failed in this run */ + /* number of resync blocks that failed in this run */ unsigned long rs_failed; /* Syncer's start time [unit jiffies] */ unsigned long rs_start; @@ -1399,7 +1399,9 @@ struct bm_extent { * you should use 64bit OS for that much storage, anyways. */ #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) #else -#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) +/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */ +#define DRBD_MAX_SECTORS_FLEX (1UL << 51) +/* corresponds to (1UL << 38) bits right now. */ #endif #endif @@ -1419,11 +1421,15 @@ extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new extern void drbd_bm_cleanup(struct drbd_conf *mdev); extern void drbd_bm_set_all(struct drbd_conf *mdev); extern void drbd_bm_clear_all(struct drbd_conf *mdev); +/* set/clear/test only a few bits at a time */ extern int drbd_bm_set_bits( struct drbd_conf *mdev, unsigned long s, unsigned long e); extern int drbd_bm_clear_bits( struct drbd_conf *mdev, unsigned long s, unsigned long e); -/* bm_set_bits variant for use while holding drbd_bm_lock */ +extern int drbd_bm_count_bits( + struct drbd_conf *mdev, const unsigned long s, const unsigned long e); +/* bm_set_bits variant for use while holding drbd_bm_lock, + * may process the whole bitmap in one go */ extern void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); @@ -1436,6 +1442,8 @@ extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, extern size_t drbd_bm_words(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); + +#define DRBD_END_OF_BITMAP (~(unsigned long)0) extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); /* bm_find_next variants for use while you hold drbd_bm_lock() */ extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); @@ -1452,8 +1460,6 @@ extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); extern void drbd_bm_unlock(struct drbd_conf *mdev); - -extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); /* drbd_main.c */ extern struct kmem_cache *drbd_request_cache; @@ -2158,10 +2164,8 @@ extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, unsigned long *bits_left, unsigned int *per_mil_done) { - /* - * this is to break it at compile time when we change that - * (we may feel 4TB maximum storage per drbd is not enough) - */ + /* this is to break it at compile time when we change that, in case we + * want to support more than (1<<32) bits on a 32bit arch. */ typecheck(unsigned long, mdev->rs_total); /* note: both rs_total and rs_left are in bits, i.e. in @@ -2186,10 +2190,19 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, *bits_left, mdev->rs_total, mdev->rs_failed); *per_mil_done = 0; } else { - /* make sure the calculation happens in long context */ - unsigned long tmp = 1000UL - - (*bits_left >> 10)*1000UL - / ((mdev->rs_total >> 10) + 1UL); + /* Make sure the division happens in long context. + * We allow up to one petabyte storage right now, + * at a granularity of 4k per bit that is 2**38 bits. + * After shift right and multiplication by 1000, + * this should still fit easily into a 32bit long, + * so we don't need a 64bit division on 32bit arch. + * Note: currently we don't support such large bitmaps on 32bit + * arch anyways, but no harm done to be prepared for it here. + */ + unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; + unsigned long left = *bits_left >> shift; + unsigned long total = 1UL + (mdev->rs_total >> shift); + unsigned long tmp = 1000UL - left * 1000UL/total; *per_mil_done = tmp; } } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 77dc022eaf6..a46bc0287e2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -527,17 +527,19 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, } } +/* input size is expected to be in KB */ char *ppsize(char *buf, unsigned long long size) { - /* Needs 9 bytes at max. */ + /* Needs 9 bytes at max including trailing NUL: + * -1ULL ==> "16384 EB" */ static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; int base = 0; - while (size >= 10000) { + while (size >= 10000 && base < sizeof(units)-1) { /* shift + round */ size = (size >> 10) + !!(size & (1<<9)); base++; } - sprintf(buf, "%lu %cB", (long)size, units[base]); + sprintf(buf, "%u %cB", (unsigned)size, units[base]); return buf; } diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index efba62cd2e5..2959cdfb77f 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -91,9 +91,9 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) seq_printf(seq, "sync'ed:"); seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); - /* if more than 1 GB display in MB */ - if (mdev->rs_total > 0x100000L) - seq_printf(seq, "(%lu/%lu)M\n\t", + /* if more than a few GB, display in MB */ + if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) + seq_printf(seq, "(%lu/%lu)M", (unsigned long) Bit2KB(rs_left >> 10), (unsigned long) Bit2KB(mdev->rs_total >> 10)); else diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d17f2ed777c..be46084c254 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -577,7 +577,7 @@ next_sector: size = BM_BLOCK_SIZE; bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); - if (bit == -1UL) { + if (bit == DRBD_END_OF_BITMAP) { mdev->bm_resync_fo = drbd_bm_bits(mdev); mdev->resync_work.cb = w_resync_inactive; put_ldev(mdev); -- cgit v1.2.3-70-g09d2 From 20ee639024e3d33111df0e343050b218c656bf16 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 18 Jan 2011 15:28:59 +0100 Subject: drbd: cleaned up __set_current_state() followed by schedule_timeout() calls Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 3 +-- drivers/block/drbd/drbd_receiver.c | 9 +++------ drivers/block/drbd/drbd_worker.c | 3 +-- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a46bc0287e2..42e16e4edfa 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -360,8 +360,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10); + schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10); if (try < max_tries) try = max_tries - 1; continue; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a7f5b6d134e..3ccc6c33a33 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -768,8 +768,7 @@ static int drbd_connect(struct drbd_conf *mdev) if (s || ++try >= 3) break; /* give the other side time to call bind() & listen() */ - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); + schedule_timeout_interruptible(HZ / 10); } if (s) { @@ -788,8 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev) } if (sock && msock) { - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); + schedule_timeout_interruptible(HZ / 10); ok = drbd_socket_okay(mdev, &sock); ok = drbd_socket_okay(mdev, &msock) && ok; if (ok) @@ -4142,8 +4140,7 @@ int drbdd_init(struct drbd_thread *thi) h = drbd_connect(mdev); if (h == 0) { drbd_disconnect(mdev); - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); + schedule_timeout_interruptible(HZ); } if (h == -1) { dev_warn(DEV, "Discarding network configuration.\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3a95b701b5d..7b3d4dc4af8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -777,8 +777,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) * queue (or even the read operations for those packets * is not finished by now). Retry in 100ms. */ - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); + schedule_timeout_interruptible(HZ / 10); w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); if (w) { w->cb = w_resync_finished; -- cgit v1.2.3-70-g09d2 From 194bfb32dba8345a7e0f83e9b1ee965e14d4b679 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 18 Jan 2011 10:38:01 +0100 Subject: drbd: serialize admin requests for new resync with pending bitmap io Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 42e16e4edfa..434b621f76a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1873,6 +1873,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl { int retcode; + /* If there is still bitmap IO pending, probably because of a previous + * resync just being finished, wait for it before requesting a new resync. */ + wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) @@ -1908,6 +1912,10 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re { int retcode; + /* If there is still bitmap IO pending, probably because of a previous + * resync just being finished, wait for it before requesting a new resync. */ + wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); if (retcode < SS_SUCCESS) { @@ -1916,7 +1924,6 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re into a full resync. */ retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { - /* open coded drbd_bitmap_io() */ if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, "set_n_write from invalidate_peer")) retcode = ERR_IO_MD_DISK; -- cgit v1.2.3-70-g09d2 From cd88d030d41a9b0100fd5fee872024e6ebc8b276 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 20 Jan 2011 11:46:41 +0100 Subject: drbd: Provide hints with the error message when clearing the sync pause flag When the user clears the sync-pause flag, and sync stays in pause state, give hints to the user, why it still is in pause state. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 12 ++++++++++-- include/linux/drbd.h | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 434b621f76a..ffe3a97fef9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1952,9 +1952,17 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n struct drbd_nl_cfg_reply *reply) { int retcode = NO_ERROR; + union drbd_state s; - if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) - retcode = ERR_PAUSE_IS_CLEAR; + if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { + s = mdev->state; + if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { + retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : + s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; + } else { + retcode = ERR_PAUSE_IS_CLEAR; + } + } reply->ret_code = retcode; return 0; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d10431fab00..ba5c785d3f7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -153,6 +153,8 @@ enum drbd_ret_code { ERR_NEED_APV_93 = 153, ERR_STONITH_AND_PROT_A = 154, ERR_CONG_NOT_PROTO_A = 155, + ERR_PIC_AFTER_DEP = 156, + ERR_PIC_PEER_DEP = 157, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3-70-g09d2 From 62b0da3a244ac33d25a77861ef1cc0080103f2ff Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 20 Jan 2011 13:25:21 +0100 Subject: drbd: log UUIDs whenever they change All decisions about sync, sync direction, and wether or not to allow a connect or attach are based on our set of UUIDs to tag a data generation. Log changes to the UUIDs whenever they occur, logging "new current UUID P:Q:R:S" is more useful than "Creating new current UUID". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 ++-- drivers/block/drbd/drbd_main.c | 71 ++++++++++++++++++-------------------- drivers/block/drbd/drbd_nl.c | 1 + drivers/block/drbd/drbd_receiver.c | 11 ++++-- drivers/block/drbd/drbd_worker.c | 20 ++++++----- 5 files changed, 58 insertions(+), 51 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index cfe7fff459e..0a9059eb94d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1240,11 +1240,11 @@ extern int _drbd_send_bitmap(struct drbd_conf *mdev); extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); +void drbd_print_uuids(struct drbd_conf *mdev, const char *text); /* drbd_meta-data.c (still in drbd_main.c) */ extern void drbd_md_sync(struct drbd_conf *mdev); extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); -/* maybe define them below as inline? */ extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); @@ -2360,9 +2360,11 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) } } -static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) +static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) { + int changed = mdev->ed_uuid != val; mdev->ed_uuid = val; + return changed; } static inline int seq_cmp(u32 a, u32 b) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e0be4077d56..b68332a0e73 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1159,6 +1159,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, atomic_inc(&mdev->local_cnt); mdev->state = ns; + + if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) + drbd_print_uuids(mdev, "attached to UUIDs"); + wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); @@ -2035,6 +2039,24 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) return _drbd_send_uuids(mdev, 8); } +void drbd_print_uuids(struct drbd_conf *mdev, const char *text) +{ + if (get_ldev_if_state(mdev, D_NEGOTIATING)) { + u64 *uuid = mdev->ldev->md.uuid; + dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n", + text, + (unsigned long long)uuid[UI_CURRENT], + (unsigned long long)uuid[UI_BITMAP], + (unsigned long long)uuid[UI_HISTORY_START], + (unsigned long long)uuid[UI_HISTORY_END]); + put_ldev(mdev); + } else { + dev_info(DEV, "%s effective data uuid: %016llX\n", + text, + (unsigned long long)mdev->ed_uuid); + } +} + int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) { struct p_rs_uuid p; @@ -2044,6 +2066,7 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; drbd_uuid_set(mdev, UI_BITMAP, uuid); + drbd_print_uuids(mdev, "updated sync UUID"); drbd_md_sync(mdev); p.uuid = cpu_to_be64(uuid); @@ -3749,28 +3772,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) return rv; } -static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) -{ - static char *uuid_str[UI_EXTENDED_SIZE] = { - [UI_CURRENT] = "CURRENT", - [UI_BITMAP] = "BITMAP", - [UI_HISTORY_START] = "HISTORY_START", - [UI_HISTORY_END] = "HISTORY_END", - [UI_SIZE] = "SIZE", - [UI_FLAGS] = "FLAGS", - }; - - if (index >= UI_EXTENDED_SIZE) { - dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n"); - return; - } - - dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n", - uuid_str[index], - (unsigned long long)mdev->ldev->md.uuid[index]); -} - - /** * drbd_md_mark_dirty() - Mark meta data super block as dirty * @mdev: DRBD device. @@ -3800,10 +3801,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) { int i; - for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { + for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; - debug_drbd_uuid(mdev, i+1); - } } void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) @@ -3818,7 +3817,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) } mdev->ldev->md.uuid[idx] = val; - debug_drbd_uuid(mdev, idx); drbd_md_mark_dirty(mdev); } @@ -3828,7 +3826,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) if (mdev->ldev->md.uuid[idx]) { drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; - debug_drbd_uuid(mdev, UI_HISTORY_START); } _drbd_uuid_set(mdev, idx, val); } @@ -3843,14 +3840,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) { u64 val; + unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; + + if (bm_uuid) + dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); - dev_info(DEV, "Creating new current UUID\n"); - D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; - debug_drbd_uuid(mdev, UI_BITMAP); get_random_bytes(&val, sizeof(u64)); _drbd_uuid_set(mdev, UI_CURRENT, val); + drbd_print_uuids(mdev, "new current UUID"); /* get it to stable storage _now_ */ drbd_md_sync(mdev); } @@ -3864,16 +3863,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) drbd_uuid_move_history(mdev); mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; mdev->ldev->md.uuid[UI_BITMAP] = 0; - debug_drbd_uuid(mdev, UI_HISTORY_START); - debug_drbd_uuid(mdev, UI_BITMAP); } else { - if (mdev->ldev->md.uuid[UI_BITMAP]) - dev_warn(DEV, "bm UUID already set"); - - mdev->ldev->md.uuid[UI_BITMAP] = val; - mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); + unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; + if (bm_uuid) + dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); - debug_drbd_uuid(mdev, UI_BITMAP); + mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); } drbd_md_mark_dirty(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ffe3a97fef9..ce6f2fe8085 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2151,6 +2151,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl if (skip_initial_sync) { drbd_send_uuids_skip_initial_sync(mdev); _drbd_uuid_set(mdev, UI_BITMAP, 0); + drbd_print_uuids(mdev, "cleared bitmap UUID"); spin_lock_irq(&mdev->req_lock); _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e9354931eac..e5686a81f42 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3024,7 +3024,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned { struct p_uuids *p = &mdev->data.rbuf.uuids; u64 *p_uuid; - int i; + int i, updated_uuids = 0; p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); @@ -3059,13 +3059,14 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); drbd_md_sync(mdev); + updated_uuids = 1; } put_ldev(mdev); } else if (mdev->state.disk < D_INCONSISTENT && mdev->state.role == R_PRIMARY) { /* I am a diskless primary, the peer just created a new current UUID for me. */ - drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); + updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); } /* Before we test for the disk state, we should wait until an eventually @@ -3074,7 +3075,10 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned new disk state... */ wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) - drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); + updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); + + if (updated_uuids) + drbd_print_uuids(mdev, "receiver updated UUIDs to"); return true; } @@ -3305,6 +3309,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); _drbd_uuid_set(mdev, UI_BITMAP, 0UL); + drbd_print_uuids(mdev, "updated sync uuid"); drbd_start_resync(mdev, C_SYNC_TARGET); put_ldev(mdev); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ec42e04bb51..ff0eb308ee4 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -871,14 +871,18 @@ int drbd_resync_finished(struct drbd_conf *mdev) } } - drbd_uuid_set_bm(mdev, 0UL); - - if (mdev->p_uuid) { - /* Now the two UUID sets are equal, update what we - * know of the peer. */ - int i; - for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) - mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; + if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { + /* for verify runs, we don't update uuids here, + * so there would be nothing to report. */ + drbd_uuid_set_bm(mdev, 0UL); + drbd_print_uuids(mdev, "updated UUIDs"); + if (mdev->p_uuid) { + /* Now the two UUID sets are equal, update what we + * know of the peer. */ + int i; + for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) + mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; + } } } -- cgit v1.2.3-70-g09d2 From 20ceb2b22edaf51e59e76087efdc71a16a2858de Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 21 Jan 2011 10:56:44 +0100 Subject: drbd: describe bitmap locking for bulk operation in finer detail Now that we do no longer in-place endian-swap the bitmap, we allow selected bitmap operations (testing bits, sometimes even settting bits) during some bulk operations. This caused us to hit a lot of FIXME asserts similar to FIXME asender in drbd_bm_count_bits, bitmap locked for 'write from resync_finished' by worker Which now is nonsense: looking at the bitmap is perfectly legal as long as it is not being resized. This cosmetic patch defines some flags to describe expectations in finer detail, so the asserts in e.g. bm_change_bits_to() can be skipped if appropriate. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 48 +++++++++++++------------------ drivers/block/drbd/drbd_int.h | 36 ++++++++++++++++++++--- drivers/block/drbd/drbd_main.c | 58 +++++++++++++++++++++++++------------- drivers/block/drbd/drbd_nl.c | 19 +++++++++---- drivers/block/drbd/drbd_receiver.c | 17 +++++++---- 5 files changed, 115 insertions(+), 63 deletions(-) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 25428bc2847..b62dd5f26c5 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -104,26 +104,16 @@ struct drbd_bitmap { wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ - unsigned long bm_flags; + enum bm_flag bm_flags; /* debugging aid, in case we are still racy somewhere */ char *bm_why; struct task_struct *bm_task; }; -/* definition of bits in bm_flags */ -#define BM_LOCKED 0 -// #define BM_MD_IO_ERROR 1 unused now. -#define BM_P_VMALLOCED 2 - static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, unsigned long e, int val, const enum km_type km); -static int bm_is_locked(struct drbd_bitmap *b) -{ - return test_bit(BM_LOCKED, &b->bm_flags); -} - #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) { @@ -140,7 +130,7 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) b->bm_task == mdev->worker.task ? "worker" : "?"); } -void drbd_bm_lock(struct drbd_conf *mdev, char *why) +void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) { struct drbd_bitmap *b = mdev->bitmap; int trylock_failed; @@ -163,8 +153,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) b->bm_task == mdev->worker.task ? "worker" : "?"); mutex_lock(&b->bm_change); } - if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) + if (BM_LOCKED_MASK & b->bm_flags) dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); + b->bm_flags |= flags & BM_LOCKED_MASK; b->bm_why = why; b->bm_task = current; @@ -178,9 +169,10 @@ void drbd_bm_unlock(struct drbd_conf *mdev) return; } - if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) + if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags)) dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); + b->bm_flags &= ~BM_LOCKED_MASK; b->bm_why = NULL; b->bm_task = NULL; mutex_unlock(&b->bm_change); @@ -421,9 +413,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) } if (vmalloced) - set_bit(BM_P_VMALLOCED, &b->bm_flags); + b->bm_flags |= BM_P_VMALLOCED; else - clear_bit(BM_P_VMALLOCED, &b->bm_flags); + b->bm_flags &= ~BM_P_VMALLOCED; return new_pages; } @@ -460,7 +452,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) { ERR_IF (!mdev->bitmap) return; bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); - bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); + bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); kfree(mdev->bitmap); mdev->bitmap = NULL; } @@ -623,7 +615,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) ERR_IF(!b) return -ENOMEM; - drbd_bm_lock(mdev, "resize"); + drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", (unsigned long long)capacity); @@ -631,7 +623,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) if (capacity == b->bm_dev_capacity) goto out; - opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); + opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags); if (capacity == 0) { spin_lock_irq(&b->bm_lock); @@ -1030,7 +1022,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id * as we submit copies of pages anyways. */ if (!ctx.flags) - WARN_ON(!bm_is_locked(b)); + WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); num_pages = b->bm_number_of_pages; @@ -1220,7 +1212,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, ERR_IF(!b->bm_pages) return i; spin_lock_irq(&b->bm_lock); - if (bm_is_locked(b)) + if (BM_DONT_TEST & b->bm_flags) bm_print_lock_info(mdev); i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); @@ -1246,13 +1238,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo * you must take drbd_bm_lock() first */ unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) { - /* WARN_ON(!bm_is_locked(mdev)); */ + /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ return __bm_find_next(mdev, bm_fo, 0, KM_USER1); } unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) { - /* WARN_ON(!bm_is_locked(mdev)); */ + /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */ return __bm_find_next(mdev, bm_fo, 1, KM_USER1); } @@ -1322,7 +1314,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, ERR_IF(!b->bm_pages) return 0; spin_lock_irqsave(&b->bm_lock, flags); - if (bm_is_locked(b)) + if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) bm_print_lock_info(mdev); c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); @@ -1439,7 +1431,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) ERR_IF(!b->bm_pages) return 0; spin_lock_irqsave(&b->bm_lock, flags); - if (bm_is_locked(b)) + if (BM_DONT_TEST & b->bm_flags) bm_print_lock_info(mdev); if (bitnr < b->bm_bits) { p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); @@ -1474,7 +1466,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi ERR_IF(!b->bm_pages) return 1; spin_lock_irqsave(&b->bm_lock, flags); - if (bm_is_locked(b)) + if (BM_DONT_TEST & b->bm_flags) bm_print_lock_info(mdev); for (bitnr = s; bitnr <= e; bitnr++) { unsigned int idx = bm_bit_to_page_idx(b, bitnr); @@ -1522,7 +1514,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) ERR_IF(!b->bm_pages) return 0; spin_lock_irqsave(&b->bm_lock, flags); - if (bm_is_locked(b)) + if (BM_DONT_TEST & b->bm_flags) bm_print_lock_info(mdev); s = S2W(enr); @@ -1555,7 +1547,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) ERR_IF(!b->bm_pages) return 0; spin_lock_irq(&b->bm_lock); - if (bm_is_locked(b)) + if (BM_DONT_SET & b->bm_flags) bm_print_lock_info(mdev); weight = b->bm_set; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0a9059eb94d..267d9897ca8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -855,6 +855,32 @@ enum { struct drbd_bitmap; /* opaque for drbd_conf */ +/* definition of bits in bm_flags to be used in drbd_bm_lock + * and drbd_bitmap_io and friends. */ +enum bm_flag { + /* do we need to kfree, or vfree bm_pages? */ + BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ + + /* currently locked for bulk operation */ + BM_LOCKED_MASK = 0x7, + + /* in detail, that is: */ + BM_DONT_CLEAR = 0x1, + BM_DONT_SET = 0x2, + BM_DONT_TEST = 0x4, + + /* (test bit, count bit) allowed (common case) */ + BM_LOCKED_TEST_ALLOWED = 0x3, + + /* testing bits, as well as setting new bits allowed, but clearing bits + * would be unexpected. Used during bitmap receive. Setting new bits + * requires sending of "out-of-sync" information, though. */ + BM_LOCKED_SET_ALLOWED = 0x1, + + /* clear is not expected while bitmap is locked for bulk operation */ +}; + + /* TODO sort members for performance * MAYBE group them further */ @@ -920,6 +946,7 @@ struct drbd_md_io { struct bm_io_work { struct drbd_work w; char *why; + enum bm_flag flags; int (*io_fn)(struct drbd_conf *mdev); void (*done)(struct drbd_conf *mdev, int rv); }; @@ -1242,7 +1269,6 @@ extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); void drbd_print_uuids(struct drbd_conf *mdev, const char *text); -/* drbd_meta-data.c (still in drbd_main.c) */ extern void drbd_md_sync(struct drbd_conf *mdev); extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); @@ -1263,10 +1289,12 @@ extern void drbd_md_mark_dirty_(struct drbd_conf *mdev, extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), void (*done)(struct drbd_conf *, int), - char *why); + char *why, enum bm_flag flags); +extern int drbd_bitmap_io(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + char *why, enum bm_flag flags); extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); -extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); @@ -1452,7 +1480,7 @@ extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, unsigned long *buffer); -extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); +extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags); extern void drbd_bm_unlock(struct drbd_conf *mdev); /* drbd_main.c */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b68332a0e73..a9e9b496e73 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1320,7 +1320,9 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv) } } -int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) +int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + char *why, enum bm_flag flags) { int rv; @@ -1328,10 +1330,8 @@ int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_ /* open coded non-blocking drbd_suspend_io(mdev); */ set_bit(SUSPEND_IO, &mdev->flags); - if (!is_susp(mdev->state)) - D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); - drbd_bm_lock(mdev, why); + drbd_bm_lock(mdev, why, flags); rv = io_fn(mdev); drbd_bm_unlock(mdev); @@ -1438,7 +1438,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && mdev->state.conn == C_WF_BITMAP_S) drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, - "send_bitmap (WFBitMapS)"); + "send_bitmap (WFBitMapS)", + BM_LOCKED_TEST_ALLOWED); /* Lost contact to peer's copy of the data */ if ((os.pdsk >= D_INCONSISTENT && @@ -1469,7 +1470,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote diskless peer"); + /* We may still be Primary ourselves. + * No harm done if the bitmap still changes, + * redirtied pages will follow later. */ + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + "demote diskless peer", BM_LOCKED_SET_ALLOWED); put_ldev(mdev); } @@ -1478,7 +1483,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * if there is a resync going on still */ if (os.role == R_PRIMARY && ns.role == R_SECONDARY && mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote"); + /* No changes to the bitmap expected this time, so assert that, + * even though no harm was done if it did change. */ + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + "demote", BM_LOCKED_TEST_ALLOWED); put_ldev(mdev); } @@ -1512,12 +1520,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* We are in the progress to start a full sync... */ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) - drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); + /* no other bitmap changes expected during this phase */ + drbd_queue_bitmap_io(mdev, + &drbd_bmio_set_n_write, &abw_start_sync, + "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); /* We are invalidating our self... */ if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) - drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); + /* other bitmap operation expected during this phase */ + drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, + "set_n_write from invalidate", BM_LOCKED_MASK); /* first half of local IO error, failure to attach, * or administrative detach */ @@ -1599,14 +1612,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk - * failure. Resync aborted because of connection failure does bitmap - * writeout from drbd_disconnect. + * failure, or because of connection loss. * For resync aborted because of local disk failure, we cannot do * any bitmap writeout anymore. + * No harm done if some bits change during this phase. */ - if (os.conn > C_CONNECTED && ns.conn == C_CONNECTED && - mdev->state.conn == C_CONNECTED && get_ldev(mdev)) { - drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); + if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { + drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, + "write from resync_finished", BM_LOCKED_SET_ALLOWED); put_ldev(mdev); } @@ -3929,7 +3942,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); if (get_ldev(mdev)) { - drbd_bm_lock(mdev, work->why); + drbd_bm_lock(mdev, work->why, work->flags); rv = work->io_fn(mdev); drbd_bm_unlock(mdev); put_ldev(mdev); @@ -3944,6 +3957,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) clear_bit(BITMAP_IO_QUEUED, &mdev->flags); work->why = NULL; + work->flags = 0; return 1; } @@ -3998,7 +4012,7 @@ void drbd_go_diskless(struct drbd_conf *mdev) void drbd_queue_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), void (*done)(struct drbd_conf *, int), - char *why) + char *why, enum bm_flag flags) { D_ASSERT(current == mdev->worker.task); @@ -4012,6 +4026,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, mdev->bm_io_work.io_fn = io_fn; mdev->bm_io_work.done = done; mdev->bm_io_work.why = why; + mdev->bm_io_work.flags = flags; spin_lock_irq(&mdev->req_lock); set_bit(BITMAP_IO, &mdev->flags); @@ -4031,19 +4046,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, * freezes application IO while that the actual IO operations runs. This * functions MAY NOT be called from worker context. */ -int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) +int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), + char *why, enum bm_flag flags) { int rv; D_ASSERT(current != mdev->worker.task); - drbd_suspend_io(mdev); + if ((flags & BM_LOCKED_SET_ALLOWED) == 0) + drbd_suspend_io(mdev); - drbd_bm_lock(mdev, why); + drbd_bm_lock(mdev, why, flags); rv = io_fn(mdev); drbd_bm_unlock(mdev); - drbd_resume_io(mdev); + if ((flags & BM_LOCKED_SET_ALLOWED) == 0) + drbd_resume_io(mdev); return rv; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ce6f2fe8085..bc0bcb96460 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -648,7 +648,9 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_ dev_info(DEV, "Writing the whole bitmap, %s\n", la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); - err = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ + /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ + err = drbd_bitmap_io(mdev, &drbd_bm_write, + "size changed", BM_LOCKED_MASK); if (err) { rv = dev_size_error; goto out; @@ -1160,12 +1162,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); - if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, + "set_n_write from attaching", BM_LOCKED_MASK)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } else { - if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { + if (drbd_bitmap_io(mdev, &drbd_bm_read, + "read from attaching", BM_LOCKED_MASK) < 0) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } @@ -1173,7 +1177,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (cp_discovered) { drbd_al_apply_to_bm(mdev); - if (drbd_bitmap_io(mdev, &drbd_bm_write, "crashed primary apply AL")) { + if (drbd_bitmap_io(mdev, &drbd_bm_write, + "crashed primary apply AL", BM_LOCKED_MASK)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } @@ -1925,7 +1930,8 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, - "set_n_write from invalidate_peer")) + "set_n_write from invalidate_peer", + BM_LOCKED_SET_ALLOWED)) retcode = ERR_IO_MD_DISK; } } else @@ -2143,7 +2149,8 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ if (args.clear_bm) { - err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); + err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, + "clear_n_write from new_c_uuid", BM_LOCKED_MASK); if (err) { dev_err(DEV, "Writing bitmap failed with %d\n",err); retcode = ERR_IO_MD_DISK; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e5686a81f42..e13134f83fa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2599,7 +2599,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (abs(hg) >= 2) { dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); - if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", + BM_LOCKED_SET_ALLOWED)) return C_MASK; } @@ -3053,7 +3054,8 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (skip_initial_sync) { dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, - "clear_n_write from receive_uuids"); + "clear_n_write from receive_uuids", + BM_LOCKED_TEST_ALLOWED); _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); _drbd_uuid_set(mdev, UI_BITMAP, 0); _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), @@ -3494,7 +3496,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne int ok = false; struct p_header80 *h = &mdev->data.rbuf.header.h80; - /* drbd_bm_lock(mdev, "receive bitmap"); By intention no bm_lock */ + drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); + /* you are supposed to send additional out-of-sync information + * if you actually set bits during this phase */ /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ @@ -3568,7 +3572,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne ok = true; out: - /* drbd_bm_unlock(mdev); by intention no lock */ + drbd_bm_unlock(mdev); if (ok && mdev->state.conn == C_WF_BITMAP_S) drbd_start_resync(mdev, C_SYNC_SOURCE); free_page((unsigned long) buffer); @@ -3817,7 +3821,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) fp = FP_DONT_CARE; if (get_ldev(mdev)) { - drbd_bitmap_io(mdev, &drbd_bm_write, "write from disconnect"); fp = mdev->ldev->dc.fencing; put_ldev(mdev); } @@ -3846,6 +3849,10 @@ static void drbd_disconnect(struct drbd_conf *mdev) drbd_request_state(mdev, NS(conn, C_STANDALONE)); } + /* serialize with bitmap writeout triggered by the state change, + * if any. */ + wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + /* tcp_close and release of sendpage pages can be deferred. I don't * want to use SO_LINGER, because apparently it can be deferred for * more than 20 seconds (longest time I checked). -- cgit v1.2.3-70-g09d2 From 873b0d5f98ab70e4df7a62b2ef0305373f88f330 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 21 Jan 2011 22:53:48 +0100 Subject: drbd: serialize admin requests for new verify run with pending bitmap io This is an addendum to drbd: serialize admin requests for new resync with pending bitmap io It avoids a race that could trigger "FIXME" assert log messages. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/block/drbd/drbd_nl.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bc0bcb96460..87aaa7fc4d2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2106,6 +2106,11 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, reply->ret_code = ERR_MANDATORY_TAG; return 0; } + + /* If there is still bitmap IO pending, e.g. previous resync or verify + * just being finished, wait for it before requesting a new resync. */ + wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + /* w_make_ov_request expects position to be aligned */ mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); -- cgit v1.2.3-70-g09d2