From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:03 +0100 Subject: drbd: new on-disk activity log transaction format Use a new on-disk transaction format for the activity log, which allows for multiple changes to the active set per transaction. Using 4k transaction blocks, we can now get rid of the work-around code to deal with devices not supporting 512 byte logical block size. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 447c3675238..75f05af3372 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -102,10 +102,12 @@ #define DRBD_RATE_DEF 250 /* kb/second */ /* less than 7 would hit performance unnecessarily. - * 3833 is the largest prime that still does fit - * into 64 sectors of activity log */ + * 919 slots context information per transaction, + * 32k activity log, 4k transaction size, + * one transaction in flight: + * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 #define DRBD_AFTER_MIN -1 -- cgit v1.2.3-70-g09d2 From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:51:43 +0100 Subject: drbd: default to detach on-io-error Old default behaviour was "pass-on", which is not useful in production at all. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 75f05af3372..22920a8af4e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,7 +125,7 @@ #define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ -#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT -- cgit v1.2.3-70-g09d2 From f399002e68e626e7bc443e6fcab1772704cc197f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Mar 2011 14:31:09 +0100 Subject: drbd: distribute former syncer_conf settings to disk, connection, and resource level This commit breaks the API again. Move per-volume former syncer options into disk_conf. Move per-connection former syncer options into net_conf. Renamed the remainign sync_conf to res_opts Syncer settings have been changeable at runtime, so we need to prepare for these settings to be runtime-changeable in their new home as well. Introduce new configuration operations, and share the netlink attribute between "attach" (create new disk) and "disk-opts" (change options). Same for "connect" and "net-opts". Some fields cannot be changed at runtime, however. Introduce a new flag GENLA_F_INVARIANT to be able to trigger on that in the generated validation and assignment functions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +- drivers/block/drbd/drbd_main.c | 72 +++-- drivers/block/drbd/drbd_nl.c | 550 ++++++++++++++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 51 ++-- drivers/block/drbd/drbd_state.c | 4 +- drivers/block/drbd/drbd_worker.c | 50 ++-- include/linux/drbd_genl.h | 133 +++++---- include/linux/drbd_limits.h | 2 + include/linux/genl_magic_func.h | 49 ++-- include/linux/genl_magic_struct.h | 18 +- 10 files changed, 572 insertions(+), 367 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d6e7e657e7a..bc265f3733c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -860,7 +860,7 @@ struct drbd_md { s32 bm_offset; /* signed relative sector offset to bitmap */ /* u32 al_nr_extents; important for restoring the AL - * is stored into sync_conf.al_extents, which in turn + * is stored into ldev->dc.al_extents, which in turn * gets applied to act_log->nr_elements */ }; @@ -929,6 +929,7 @@ struct drbd_tconn { /* is a resource from the config file */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ + struct res_opts res_opts; struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ @@ -945,6 +946,8 @@ struct drbd_tconn { /* is a resource from the config file */ struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + struct crypto_hash *csums_tfm; + struct crypto_hash *verify_tfm; void *int_dig_out; void *int_dig_in; void *int_dig_vv; @@ -963,7 +966,6 @@ struct drbd_conf { unsigned long flags; /* configured by drbdsetup */ - struct syncer_conf sync_conf; struct drbd_backing_dev *ldev __protected_by(local); sector_t p_size; /* partner's disk size */ @@ -1037,8 +1039,6 @@ struct drbd_conf { /* size of out-of-sync range in sectors. */ sector_t ov_last_oos_size; unsigned long ov_left; /* in bits */ - struct crypto_hash *csums_tfm; - struct crypto_hash *verify_tfm; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -1188,7 +1188,7 @@ extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 -extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); +extern int drbd_send_sync_param(struct drbd_conf *mdev); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 79a0e042252..bdb12723585 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -784,7 +784,7 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, return ok; } -int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) +int drbd_send_sync_param(struct drbd_conf *mdev) { struct p_rs_param_95 *p; struct socket *sock; @@ -793,7 +793,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) - + strlen(mdev->sync_conf.verify_alg) + 1 + + strlen(mdev->tconn->net_conf->verify_alg) + 1 : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); @@ -812,16 +812,25 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - p->rate = cpu_to_be32(sc->rate); - p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead); - p->c_delay_target = cpu_to_be32(sc->c_delay_target); - p->c_fill_target = cpu_to_be32(sc->c_fill_target); - p->c_max_rate = cpu_to_be32(sc->c_max_rate); + if (get_ldev(mdev)) { + p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); + p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); + p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); + p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); + p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); + put_ldev(mdev); + } else { + p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); + p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); + p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); + p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); + } if (apv >= 88) - strcpy(p->verify_alg, mdev->sync_conf.verify_alg); + strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, mdev->sync_conf.csums_alg); + strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); } else @@ -1043,7 +1052,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, int bits; /* may we use this feature? */ - if ((mdev->sync_conf.use_rle == 0) || + if ((mdev->tconn->net_conf->use_rle == 0) || (mdev->tconn->agreed_pro_version < 90)) return 0; @@ -1790,26 +1799,8 @@ static int drbd_release(struct gendisk *gd, fmode_t mode) static void drbd_set_defaults(struct drbd_conf *mdev) { - /* This way we get a compile error when sync_conf grows, - and we forgot to initialize it here */ - mdev->sync_conf = (struct syncer_conf) { - /* .rate = */ DRBD_RATE_DEF, - /* .after = */ DRBD_AFTER_DEF, - /* .al_extents = */ DRBD_AL_EXTENTS_DEF, - /* .verify_alg = */ {}, 0, - /* .cpu_mask = */ {}, 0, - /* .csums_alg = */ {}, 0, - /* .use_rle = */ 0, - /* .on_no_data = */ DRBD_ON_NO_DATA_DEF, - /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF, - /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF, - /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF, - /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF, - /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF - }; - - /* Have to use that way, because the layout differs between - big endian and little endian */ + /* Beware! The actual layout differs + * between big endian and little endian */ mdev->state = (union drbd_state) { { .role = R_SECONDARY, .peer = R_UNKNOWN, @@ -2286,6 +2277,11 @@ struct drbd_tconn *drbd_new_tconn(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + tconn->res_opts = (struct res_opts) { + {}, 0, /* cpu_mask */ + DRBD_ON_NO_DATA_DEF, /* on_no_data */ + }; + mutex_lock(&drbd_cfg_mutex); list_add_tail(&tconn->all_tconn, &drbd_tconns); mutex_unlock(&drbd_cfg_mutex); @@ -2559,10 +2555,10 @@ void drbd_free_sock(struct drbd_tconn *tconn) void drbd_free_resources(struct drbd_conf *mdev) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = NULL; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = NULL; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = NULL; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = NULL; crypto_free_hash(mdev->tconn->cram_hmac_tfm); mdev->tconn->cram_hmac_tfm = NULL; crypto_free_hash(mdev->tconn->integrity_w_tfm); @@ -2589,7 +2585,7 @@ struct meta_data_on_disk { u32 md_size_sect; u32 al_offset; /* offset to this block */ u32 al_nr_extents; /* important for restoring the AL */ - /* `-- act_log->nr_elements <-- sync_conf.al_extents */ + /* `-- act_log->nr_elements <-- ldev->dc.al_extents */ u32 bm_offset; /* offset to the bitmap, from here */ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ u32 la_peer_max_bio_size; /* last peer max_bio_size */ @@ -2715,7 +2711,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); - mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); + bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); spin_lock_irq(&mdev->tconn->req_lock); @@ -2727,8 +2723,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - if (mdev->sync_conf.al_extents < 7) - mdev->sync_conf.al_extents = 127; + if (bdev->dc.al_extents < 7) + bdev->dc.al_extents = 127; err: mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ac0a175e778..18cd2ed4e8c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -53,8 +53,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); @@ -66,7 +68,7 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); /* .dumpit */ @@ -170,7 +172,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ - err = drbd_cfg_context_from_attrs(NULL, info->attrs); + err = drbd_cfg_context_from_attrs(NULL, info); if (err) goto fail; @@ -616,6 +618,7 @@ static const char *from_attrs_err_to_txt(int err) { return err == -ENOMSG ? "required attribute missing" : err == -EOPNOTSUPP ? "unknown mandatory attribute" : + err == -EEXIST ? "can not change invariant setting" : "invalid attribute value"; } @@ -633,7 +636,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { - err = set_role_parms_from_attrs(&parms, info->attrs); + err = set_role_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -898,24 +901,24 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass * failed, and 0 on success. You should call drbd_md_sync() after you called * this function. */ -static int drbd_check_al_size(struct drbd_conf *mdev) +static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) { struct lru_cache *n, *t; struct lc_element *e; unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) - mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN)) + dc->al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && - mdev->act_log->nr_elements == mdev->sync_conf.al_extents) + mdev->act_log->nr_elements == dc->al_extents) return 0; in_use = 0; t = mdev->act_log; n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, - mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); + dc->al_extents, sizeof(struct lc_element), 0); if (n == NULL) { dev_err(DEV, "Cannot allocate act_log lru!\n"); @@ -1069,6 +1072,114 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_conf *mdev; + struct disk_conf *ndc; /* new disk conf */ + int err, fifo_size; + int *rs_plan_s = NULL; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; + + /* we also need a disk + * to change the options on */ + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto out; + } + +/* FIXME freeze IO, cluster wide. + * + * We should make sure no-one uses + * some half-updated struct when we + * assign it later. */ + + ndc = kmalloc(sizeof(*ndc), GFP_KERNEL); + if (!ndc) { + retcode = ERR_NOMEM; + goto fail; + } + + memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc)); + err = disk_conf_from_attrs_for_change(ndc, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + } + + if (!expect(ndc->resync_rate >= 1)) + ndc->resync_rate = 1; + + /* clip to allowed range */ + if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN)) + ndc->al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX)) + ndc->al_extents = DRBD_AL_EXTENTS_MAX; + + /* most sanity checks done, try to assign the new sync-after + * dependency. need to hold the global lock in there, + * to avoid a race in the dependency loop check. */ + retcode = drbd_alter_sa(mdev, ndc->resync_after); + if (retcode != NO_ERROR) + goto fail; + + fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; + if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { + rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + if (!rs_plan_s) { + dev_err(DEV, "kmalloc of fifo_buffer failed"); + retcode = ERR_NOMEM; + goto fail; + } + } + + if (fifo_size != mdev->rs_plan_s.size) { + kfree(mdev->rs_plan_s.values); + mdev->rs_plan_s.values = rs_plan_s; + mdev->rs_plan_s.size = fifo_size; + mdev->rs_planed = 0; + rs_plan_s = NULL; + } + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + drbd_al_shrink(mdev); + err = drbd_check_al_size(mdev, ndc); + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + if (err) { + retcode = ERR_NOMEM; + goto fail; + } + + /* FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + mdev->ldev->dc = *ndc; + + drbd_md_sync(mdev); + + + if (mdev->state.conn >= C_CONNECTED) + drbd_send_sync_param(mdev); + + fail: + put_ldev(mdev); + kfree(ndc); + kfree(rs_plan_s); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; @@ -1111,12 +1222,29 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; - nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; - nbc->dc.fencing = DRBD_FENCING_DEF; - nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - - err = disk_conf_from_attrs(&nbc->dc, info->attrs); + nbc->dc = (struct disk_conf) { + {}, 0, /* backing_dev */ + {}, 0, /* meta_dev */ + 0, /* meta_dev_idx */ + DRBD_DISK_SIZE_SECT_DEF, /* disk_size */ + DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */ + DRBD_ON_IO_ERROR_DEF, /* on_io_error */ + DRBD_FENCING_DEF, /* fencing */ + DRBD_RATE_DEF, /* resync_rate */ + DRBD_AFTER_DEF, /* resync_after */ + DRBD_AL_EXTENTS_DEF, /* al_extents */ + DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */ + DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */ + DRBD_C_FILL_TARGET_DEF, /* c_fill_target */ + DRBD_C_MAX_RATE_DEF, /* c_max_rate */ + DRBD_C_MIN_RATE_DEF, /* c_min_rate */ + 0, /* no_disk_barrier */ + 0, /* no_disk_flush */ + 0, /* no_disk_drain */ + 0, /* no_md_flush */ + }; + + err = disk_conf_from_attrs(&nbc->dc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1267,7 +1395,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Since we are diskless, fix the activity log first... */ - if (drbd_check_al_size(mdev)) { + if (drbd_check_al_size(mdev, &nbc->dc)) { retcode = ERR_NOMEM; goto force_diskless_dec; } @@ -1498,6 +1626,158 @@ out: return 0; } +static bool conn_resync_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET || + mdev->state.conn == C_PAUSED_SYNC_S || + mdev->state.conn == C_PAUSED_SYNC_T) + return true; + } + return false; +} + +static bool conn_ov_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_VERIFY_S || + mdev->state.conn == C_VERIFY_T) + return true; + } + return false; +} + +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_tconn *tconn; + struct net_conf *new_conf = NULL; + int err; + int ovr; /* online verify running */ + int rsr; /* re-sync running */ + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tconn = adm_ctx.tconn; + + new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_conf) { + retcode = ERR_NOMEM; + goto out; + } + + /* we also need a net config + * to change the options on */ + if (!get_net_conf(tconn)) { + drbd_msg_put_info("net conf missing, try connect"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + + conn_reconfig_start(tconn); + + memcpy(new_conf, tconn->net_conf, sizeof(*new_conf)); + err = net_conf_from_attrs_for_change(new_conf, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } + + /* re-sync running */ + rsr = conn_resync_running(tconn); + if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) { + retcode = ERR_CSUMS_RESYNC_RUNNING; + goto fail; + } + + if (!rsr && new_conf->csums_alg[0]) { + csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; + retcode = ERR_CSUMS_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { + retcode = ERR_CSUMS_ALG_ND; + goto fail; + } + } + + /* online verify running */ + ovr = conn_ov_running(tconn); + if (ovr) { + if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) { + retcode = ERR_VERIFY_RUNNING; + goto fail; + } + } + + if (!ovr && new_conf->verify_alg[0]) { + verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; + retcode = ERR_VERIFY_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { + retcode = ERR_VERIFY_ALG_ND; + goto fail; + } + } + + + /* For now, use struct assignment, not pointer assignment. + * We don't have any means to determine who might still + * keep a local alias into the struct, + * so we cannot just free it and hope for the best :( + * FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + *tconn->net_conf = *new_conf; + + if (!rsr) { + crypto_free_hash(tconn->csums_tfm); + tconn->csums_tfm = csums_tfm; + csums_tfm = NULL; + } + if (!ovr) { + crypto_free_hash(tconn->verify_tfm); + tconn->verify_tfm = verify_tfm; + verify_tfm = NULL; + } + + if (tconn->cstate >= C_WF_REPORT_PARAMS) + drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn))); + + fail: + crypto_free_hash(csums_tfm); + crypto_free_hash(verify_tfm); + kfree(new_conf); + put_net_conf(tconn); + conn_reconfig_done(tconn); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { char hmac_name[CRYPTO_MAX_ALG_NAME]; @@ -1531,33 +1811,47 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } /* allocation not in the IO path, cqueue thread context */ - new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; goto fail; } - new_conf->timeout = DRBD_TIMEOUT_DEF; - new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; - new_conf->ping_int = DRBD_PING_INT_DEF; - new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; - new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; - new_conf->ko_count = DRBD_KO_COUNT_DEF; - new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - new_conf->want_lose = 0; - new_conf->two_primaries = 0; - new_conf->wire_protocol = DRBD_PROT_C; - new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; - new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; - new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; - new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - - err = net_conf_from_attrs(new_conf, info->attrs); + *new_conf = (struct net_conf) { + {}, 0, /* my_addr */ + {}, 0, /* peer_addr */ + {}, 0, /* shared_secret */ + {}, 0, /* cram_hmac_alg */ + {}, 0, /* integrity_alg */ + {}, 0, /* verify_alg */ + {}, 0, /* csums_alg */ + DRBD_PROTOCOL_DEF, /* wire_protocol */ + DRBD_CONNECT_INT_DEF, /* try_connect_int */ + DRBD_TIMEOUT_DEF, /* timeout */ + DRBD_PING_INT_DEF, /* ping_int */ + DRBD_PING_TIMEO_DEF, /* ping_timeo */ + DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */ + DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */ + DRBD_KO_COUNT_DEF, /* ko_count */ + DRBD_MAX_BUFFERS_DEF, /* max_buffers */ + DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */ + DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */ + DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */ + DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */ + DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */ + DRBD_RR_CONFLICT_DEF, /* rr_conflict */ + DRBD_ON_CONGESTION_DEF, /* on_congestion */ + DRBD_CONG_FILL_DEF, /* cong_fill */ + DRBD_CONG_EXTENTS_DEF, /* cong_extents */ + 0, /* two_primaries */ + 0, /* want_lose */ + 0, /* no_cork */ + 0, /* always_asbp */ + 0, /* dry_run */ + 0, /* use_rle */ + }; + + err = net_conf_from_attrs(new_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1789,7 +2083,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) tconn = adm_ctx.tconn; memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { - err = disconnect_parms_from_attrs(&parms, info->attrs); + err = disconnect_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1848,7 +2142,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) memset(&rs, 0, sizeof(struct resize_parms)); if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { - err = resize_parms_from_attrs(&rs, info->attrs); + err = resize_parms_from_attrs(&rs, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1904,26 +2198,21 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; - int err; - int ovr; /* online verify running */ - int rsr; /* re-sync running */ - struct crypto_hash *verify_tfm = NULL; - struct crypto_hash *csums_tfm = NULL; - struct syncer_conf sc; cpumask_var_t new_cpu_mask; + struct drbd_tconn *tconn; int *rs_plan_s = NULL; - int fifo_size; + struct res_opts sc; + int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto fail; - mdev = adm_ctx.mdev; + tconn = adm_ctx.tconn; if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { retcode = ERR_NOMEM; @@ -1933,172 +2222,43 @@ int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) if (((struct drbd_genlmsghdr*)info->userhdr)->flags & DRBD_GENL_F_SET_DEFAULTS) { - memset(&sc, 0, sizeof(struct syncer_conf)); - sc.rate = DRBD_RATE_DEF; - sc.after = DRBD_AFTER_DEF; - sc.al_extents = DRBD_AL_EXTENTS_DEF; + memset(&sc, 0, sizeof(struct res_opts)); sc.on_no_data = DRBD_ON_NO_DATA_DEF; - sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; - sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; - sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; - sc.c_max_rate = DRBD_C_MAX_RATE_DEF; - sc.c_min_rate = DRBD_C_MIN_RATE_DEF; } else - memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); + sc = tconn->res_opts; - err = syncer_conf_from_attrs(&sc, info->attrs); + err = res_opts_from_attrs(&sc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - /* re-sync running */ - rsr = ( mdev->state.conn == C_SYNC_SOURCE || - mdev->state.conn == C_SYNC_TARGET || - mdev->state.conn == C_PAUSED_SYNC_S || - mdev->state.conn == C_PAUSED_SYNC_T ); - - if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { - retcode = ERR_CSUMS_RESYNC_RUNNING; - goto fail; - } - - if (!rsr && sc.csums_alg[0]) { - csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(csums_tfm)) { - csums_tfm = NULL; - retcode = ERR_CSUMS_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { - retcode = ERR_CSUMS_ALG_ND; - goto fail; - } - } - - /* online verify running */ - ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T); - - if (ovr) { - if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { - retcode = ERR_VERIFY_RUNNING; - goto fail; - } - } - - if (!ovr && sc.verify_alg[0]) { - verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(verify_tfm)) { - verify_tfm = NULL; - retcode = ERR_VERIFY_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { - retcode = ERR_VERIFY_ALG_ND; - goto fail; - } - } - /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { err = __bitmap_parse(sc.cpu_mask, 32, 0, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); + conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); retcode = ERR_CPU_MASK_PARSE; goto fail; } } - if (!expect(sc.rate >= 1)) - sc.rate = 1; - - /* clip to allowed range */ - if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) - sc.al_extents = DRBD_AL_EXTENTS_MIN; - if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) - sc.al_extents = DRBD_AL_EXTENTS_MAX; - - /* most sanity checks done, try to assign the new sync-after - * dependency. need to hold the global lock in there, - * to avoid a race in the dependency loop check. */ - retcode = drbd_alter_sa(mdev, sc.after); - if (retcode != NO_ERROR) - goto fail; - - fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { - rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); - if (!rs_plan_s) { - dev_err(DEV, "kmalloc of fifo_buffer failed"); - retcode = ERR_NOMEM; - goto fail; - } - } - - /* ok, assign the rest of it as well. - * lock against receive_SyncParam() */ - spin_lock(&mdev->peer_seq_lock); - mdev->sync_conf = sc; - - if (!rsr) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; - csums_tfm = NULL; - } - - if (!ovr) { - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; - verify_tfm = NULL; - } - - if (fifo_size != mdev->rs_plan_s.size) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; - rs_plan_s = NULL; - } - - spin_unlock(&mdev->peer_seq_lock); - if (get_ldev(mdev)) { - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - drbd_al_shrink(mdev); - err = drbd_check_al_size(mdev); - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); + tconn->res_opts = sc; - put_ldev(mdev); - drbd_md_sync(mdev); - - if (err) { - retcode = ERR_NOMEM; - goto fail; - } - } - - if (mdev->state.conn >= C_CONNECTED) - drbd_send_sync_param(mdev, &sc); - - if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) { - cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(mdev->tconn); - mdev->tconn->receiver.reset_cpu_mask = 1; - mdev->tconn->asender.reset_cpu_mask = 1; - mdev->tconn->worker.reset_cpu_mask = 1; + if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { + cpumask_copy(tconn->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(tconn); + tconn->receiver.reset_cpu_mask = 1; + tconn->asender.reset_cpu_mask = 1; + tconn->worker.reset_cpu_mask = 1; } - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: kfree(rs_plan_s); free_cpumask_var(new_cpu_mask); - crypto_free_hash(csums_tfm); - crypto_free_hash(verify_tfm); drbd_adm_finish(info, retcode); return 0; @@ -2307,6 +2467,9 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) goto nla_put_failure; + if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) + goto nla_put_failure; + if (got_ldev) if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) goto nla_put_failure; @@ -2314,9 +2477,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) goto nla_put_failure; - if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive)) - goto nla_put_failure; - nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; @@ -2532,7 +2692,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) /* resume from last known position, if possible */ struct start_ov_parms parms = { .ov_start_sector = mdev->ov_start_sector }; - int err = start_ov_parms_from_attrs(&parms, info->attrs); + int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -2568,7 +2728,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; memset(&args, 0, sizeof(args)); if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { - err = new_c_uuid_parms_from_attrs(&args, info->attrs); + err = new_c_uuid_parms_from_attrs(&args, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 50c52712715..c8c826b2444 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -763,7 +763,7 @@ int drbd_connected(int vnr, void *p, void *data) &mdev->tconn->cstate_mutex : &mdev->own_state_mutex; - ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); + ok &= drbd_send_sync_param(mdev); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); ok &= drbd_send_state(mdev); @@ -2085,7 +2085,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) int throttle = 0; /* feature disabled? */ - if (mdev->sync_conf.c_min_rate == 0) + if (mdev->ldev->dc.c_min_rate == 0) return 0; spin_lock_irq(&mdev->al_lock); @@ -2125,7 +2125,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) db = mdev->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); - if (dbdt > mdev->sync_conf.c_min_rate) + if (dbdt > mdev->ldev->dc.c_min_rate) throttle = 1; } return throttle; @@ -3001,7 +3001,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size) return false; - mdev->sync_conf.rate = be32_to_cpu(p->rate); + if (get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + put_ldev(mdev); + } if (apv >= 88) { if (apv == 88) { @@ -3029,10 +3032,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { + if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg, p->verify_alg); goto disconnect; } verify_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3043,10 +3046,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { + if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg, p->csums_alg); goto disconnect; } csums_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3057,37 +3060,39 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv > 94) { - mdev->sync_conf.rate = be32_to_cpu(p->rate); - mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); - mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); - mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); - mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); + if (apv > 94 && get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); + mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target); + mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target); + mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate); - fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; + fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); + put_ldev(mdev); goto disconnect; } } + put_ldev(mdev); } spin_lock(&mdev->peer_seq_lock); /* lock against drbd_nl_syncer_conf() */ if (verify_tfm) { - strcpy(mdev->sync_conf.verify_alg, p->verify_alg); - mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; + strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = verify_tfm; dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(mdev->sync_conf.csums_alg, p->csums_alg); - mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; + strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = csums_tfm; dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); } if (fifo_size != mdev->rs_plan_s.size) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 11685658659..77fad527fb1 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -402,7 +402,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_CONNECTED_OUTDATES; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->sync_conf.verify_alg[0] == 0)) + (mdev->tconn->net_conf->verify_alg[0] == 0)) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && @@ -668,7 +668,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && + if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a730520e468..005876b32f7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -310,12 +310,12 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); /* Free peer_req and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on @@ -451,13 +451,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ - steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ - want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps; + want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ - want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target : - sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10); + want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target : + sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10); } correction = want - mdev->rs_in_flight - mdev->rs_planed; @@ -476,7 +476,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev) if (req_sect < 0) req_sect = 0; - max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ; + max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ; if (req_sect > max_sect) req_sect = max_sect; @@ -492,11 +492,11 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ + if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - mdev->c_sync_rate = mdev->sync_conf.rate; + mdev->c_sync_rate = mdev->ldev->dc.resync_rate; number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } @@ -619,7 +619,7 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) { + if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) { switch (read_for_csum(mdev, sector, size)) { case -EIO: /* Disk failure */ put_ldev(mdev); @@ -810,7 +810,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (mdev->csums_tfm && mdev->rs_total) { + if (mdev->tconn->csums_tfm && mdev->rs_total) { const unsigned long s = mdev->rs_same_csum; const unsigned long t = mdev->rs_total; const int ratio = @@ -1019,13 +1019,13 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) /* quick hack to try to avoid a race against reconfiguration. * a real fix would be much more involved, * introducing more locking mechanisms */ - if (mdev->csums_tfm) { - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + if (mdev->tconn->csums_tfm) { + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); D_ASSERT(digest_size == di->digest_size); digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } @@ -1069,7 +1069,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { ok = 0; /* terminate the connection in case the allocation failed */ @@ -1077,7 +1077,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) } if (likely(!(peer_req->flags & EE_WAS_ERROR))) - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); else memset(digest, 0, digest_size); @@ -1141,10 +1141,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) di = peer_req->digest; if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); D_ASSERT(digest_size == di->digest_size); eq = !memcmp(digest, di->digest, digest_size); @@ -1319,9 +1319,9 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) struct drbd_conf *odev = mdev; while (1) { - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return 1; - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1408,11 +1408,11 @@ static int sync_after_error(struct drbd_conf *mdev, int o_minor) return ERR_SYNC_AFTER_CYCLE; /* dependency chain ends here, no cycles. */ - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); } } @@ -1424,7 +1424,7 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) write_lock_irq(&global_state_lock); retcode = sync_after_error(mdev, na); if (retcode == NO_ERROR) { - mdev->sync_conf.after = na; + mdev->ldev->dc.resync_after = na; do { changes = _drbd_pause_after(mdev); changes |= _drbd_resume_next(mdev); @@ -1637,7 +1637,7 @@ int drbd_worker(struct drbd_thread *thi) struct drbd_work *w = NULL; struct drbd_conf *mdev; LIST_HEAD(work_list); - int minor, intr = 0; + int vnr, intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); @@ -1722,7 +1722,7 @@ int drbd_worker(struct drbd_thread *thi) spin_unlock_irq(&tconn->data.work.q_lock); drbd_thread_stop(&tconn->receiver); - idr_for_each_entry(&tconn->volumes, mdev, minor) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. * wait here for the exiting receiver. */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index a07d69279b1..938e8560a83 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -102,66 +102,73 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, - __u64_field(1, GENLA_F_MANDATORY, disk_size) - __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) - __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) - __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) - __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) + __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) + __u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + + /* use the resize command to try and change the disk_size */ + __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) + /* we could change the max_bio_bvecs, + * but it won't propagate through the stack */ + __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) __u32_field(7, GENLA_F_MANDATORY, fencing) - __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) - __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) - __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) - __flg_field(11, GENLA_F_MANDATORY, no_md_flush) - __flg_field(12, GENLA_F_MANDATORY, use_bmbv) + + __u32_field(8, GENLA_F_MANDATORY, resync_rate) + __u32_field(9, GENLA_F_MANDATORY, resync_after) + __u32_field(10, GENLA_F_MANDATORY, al_extents) + __u32_field(11, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(12, GENLA_F_MANDATORY, c_delay_target) + __u32_field(13, GENLA_F_MANDATORY, c_fill_target) + __u32_field(14, GENLA_F_MANDATORY, c_max_rate) + __u32_field(15, GENLA_F_MANDATORY, c_min_rate) + + __flg_field(16, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(17, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(18, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(19, GENLA_F_MANDATORY, no_md_flush) + ) -GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, - __u32_field(1, GENLA_F_MANDATORY, rate) - __u32_field(2, GENLA_F_MANDATORY, after) - __u32_field(3, GENLA_F_MANDATORY, al_extents) - __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) - __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __flg_field(7, GENLA_F_MANDATORY, use_rle) - __u32_field(8, GENLA_F_MANDATORY, on_no_data) - __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) - __u32_field(10, GENLA_F_MANDATORY, c_delay_target) - __u32_field(11, GENLA_F_MANDATORY, c_fill_target) - __u32_field(12, GENLA_F_MANDATORY, c_max_rate) - __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, + __str_field(1, GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field(2, GENLA_F_MANDATORY, on_no_data) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) + __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) + __str_field(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field(4, GENLA_F_REQUIRED, my_addr, 128) - __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) - __u32_field(6, GENLA_F_REQUIRED, wire_protocol) - __u32_field(7, GENLA_F_MANDATORY, try_connect_int) - __u32_field(8, GENLA_F_MANDATORY, timeout) - __u32_field(9, GENLA_F_MANDATORY, ping_int) - __u32_field(10, GENLA_F_MANDATORY, ping_timeo) - __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) - __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) - __u32_field(13, GENLA_F_MANDATORY, ko_count) - __u32_field(14, GENLA_F_MANDATORY, max_buffers) - __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) - __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) - __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) - __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) - __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) - __u32_field(20, GENLA_F_MANDATORY, rr_conflict) - __u32_field(21, GENLA_F_MANDATORY, on_congestion) - __u32_field(22, GENLA_F_MANDATORY, cong_fill) - __u32_field(23, GENLA_F_MANDATORY, cong_extents) - __flg_field(24, GENLA_F_MANDATORY, two_primaries) - __flg_field(25, GENLA_F_MANDATORY, want_lose) - __flg_field(26, GENLA_F_MANDATORY, no_cork) - __flg_field(27, GENLA_F_MANDATORY, always_asbp) - __flg_field(28, GENLA_F_MANDATORY, dry_run) + __str_field(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field(8, GENLA_F_MANDATORY, wire_protocol) + __u32_field(9, GENLA_F_MANDATORY, try_connect_int) + __u32_field(10, GENLA_F_MANDATORY, timeout) + __u32_field(11, GENLA_F_MANDATORY, ping_int) + __u32_field(12, GENLA_F_MANDATORY, ping_timeo) + __u32_field(13, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(14, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(15, GENLA_F_MANDATORY, ko_count) + __u32_field(16, GENLA_F_MANDATORY, max_buffers) + __u32_field(17, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(18, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(19, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(20, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(21, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(22, GENLA_F_MANDATORY, rr_conflict) + __u32_field(23, GENLA_F_MANDATORY, on_congestion) + __u32_field(24, GENLA_F_MANDATORY, cong_fill) + __u32_field(25, GENLA_F_MANDATORY, cong_extents) + __flg_field(26, GENLA_F_MANDATORY, two_primaries) + __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) + __flg_field(28, GENLA_F_MANDATORY, no_cork) + __flg_field(29, GENLA_F_MANDATORY, always_asbp) + __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) + __flg_field(31, GENLA_F_MANDATORY, use_rle) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, @@ -270,11 +277,10 @@ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on replication links */ -GENL_op(DRBD_ADM_SYNCER, 9, - GENL_doit(drbd_adm_syncer), +GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, + GENL_doit(drbd_adm_resource_opts), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY) ) GENL_op( @@ -284,16 +290,28 @@ GENL_op( GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) ) +GENL_op( + DRBD_ADM_CHG_NET_OPTS, 29, + GENL_doit(drbd_adm_net_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on minors */ GENL_op(DRBD_ADM_ATTACH, 12, GENL_doit(drbd_adm_attach), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) ) +GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, + GENL_doit(drbd_adm_disk_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED) +) + GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), @@ -301,7 +319,6 @@ GENL_op( GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) ) - /* operates on all volumes within a resource */ GENL_op( DRBD_ADM_PRIMARY, 14, GENL_doit(drbd_adm_set_role), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 22920a8af4e..659a8eb3883 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -166,5 +166,7 @@ #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_PROTOCOL_DEF DRBD_PROT_C + #undef RANGE #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index c8c67239f61..e458282a372 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -190,11 +190,12 @@ static struct nlattr *nested_attr_tb[128]; #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ - /* static, potentially unused */ \ -int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +/* *_from_attrs functions are static, but potentially unused */ \ +static int __ ## s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info, bool exclude_invariants) \ { \ const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ - struct nlattr *tla = tb[tag_number]; \ + struct nlattr *tla = info->attrs[tag_number]; \ struct nlattr **ntb = nested_attr_tb; \ struct nlattr *nla; \ int err; \ @@ -211,33 +212,49 @@ int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ \ s_fields \ return 0; \ -} +} __attribute__((unused)) \ +static int s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, false); \ +} __attribute__((unused)) \ +static int s_name ## _from_attrs_for_change(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, true); \ +} __attribute__((unused)) \ -#undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ nla = ntb[__nla_type(attr_nr)]; \ if (nla) { \ - if (s) \ - s->name = __get(nla); \ - DPRINT_FIELD("<<", nla_type, name, s, nla); \ + if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + pr_info("<< must not change invariant attr: %s\n", #name); \ + return -EEXIST; \ + } \ + assignment; \ + } else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + /* attribute missing from payload, */ \ + /* which was expected */ \ } else if ((attr_flag) & GENLA_F_REQUIRED) { \ pr_info("<< missing attr: %s\n", #name); \ return -ENOMSG; \ } +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla)) + /* validate_nla() already checked nla_len <= maxlen appropriately. */ #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ - nla = ntb[__nla_type(attr_nr)]; \ - if (nla) { \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name ## _len = \ __get(s->name, nla, maxlen); \ - DPRINT_ARRAY("<<", nla_type, name, s, nla); \ - } else if ((attr_flag) & GENLA_F_REQUIRED) { \ - pr_info("<< missing attr: %s\n", #name); \ - return -ENOMSG; \ - } \ + DPRINT_ARRAY("<<", nla_type, name, s, nla)) #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 745ebfd6c7e..9a605b9ee83 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -59,12 +59,20 @@ enum { GENLA_F_MANDATORY = 1 << 14, GENLA_F_REQUIRED = 1 << 15, - /* This will not be present in the __u16 .nla_type, but can be - * triggered on in _to_skb, to exclude "sensitive" - * information from broadcasts, or on unpriviledged get requests. - * This is useful because genetlink multicast groups can be listened in - * on by anyone. */ + /* Below will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb resp. _from_attrs */ + + /* To exclude "sensitive" information from broadcasts, or on + * unpriviledged get requests. This is useful because genetlink + * multicast groups can be listened in on by anyone. */ GENLA_F_SENSITIVE = 1 << 16, + + /* INVARIAN options cannot be changed at runtime. + * Useful to share an attribute policy and struct definition, + * between some "create" and "change" commands, + * but disallow certain fields to be changed online. + */ + GENLA_F_INVARIANT = 1 << 17, }; #define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) -- cgit v1.2.3-70-g09d2 From 0c8e36d9b843be56e4e43d4ef3c3eb6a97205599 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Mar 2011 16:00:17 +0200 Subject: drbd: Introduce protocol version 100 headers The 8 byte header finally becomes too small. With the protocol 100 header we have 16 bit for the volume number, proper 32 bit for the data length, and 32 bit for further extensions in the future. Previous versions of drbd are using version 80 headers for all packets short enough for protocol 80. They support both header versions in worker context, but only version 80 headers in asynchronous context. For backwards compatibility, continue to use version 80 headers for short packets before protocol version 100. From protocol version 100 on, use the same header version for all packets. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++++++ drivers/block/drbd/drbd_main.c | 32 ++++++++++++++++++++++++++------ drivers/block/drbd/drbd_nl.c | 3 +-- drivers/block/drbd/drbd_receiver.c | 14 ++++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 2 -- include/linux/drbd_limits.h | 2 ++ 7 files changed, 50 insertions(+), 12 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6d55bb75a08..bf1aad68338 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -307,6 +307,14 @@ struct p_header95 { u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ } __packed; +struct p_header100 { + u32 magic; + u16 volume; + u16 command; + u32 length; + u32 pad; +} __packed; + extern unsigned int drbd_header_size(struct drbd_tconn *tconn); /* these defines must not be changed without changing the protocol version */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b9dcc50135c..5d9112cefcd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -698,9 +698,15 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) */ unsigned int drbd_header_size(struct drbd_tconn *tconn) { - BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); - BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); - return sizeof(struct p_header80); + if (tconn->agreed_pro_version >= 100) { + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8)); + return sizeof(struct p_header100); + } else { + BUILD_BUG_ON(sizeof(struct p_header80) != + sizeof(struct p_header95)); + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); + return sizeof(struct p_header80); + } } static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) @@ -719,10 +725,24 @@ static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, return sizeof(struct p_header95); } -static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, void *buffer, - enum drbd_packet cmd, int size) +static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd, + int size, int vnr) +{ + h->magic = cpu_to_be32(DRBD_MAGIC_100); + h->volume = cpu_to_be16(vnr); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); + h->pad = 0; + return sizeof(struct p_header100); +} + +static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, + void *buffer, enum drbd_packet cmd, int size) { - if (tconn->agreed_pro_version >= 95) + if (tconn->agreed_pro_version >= 100) + return prepare_header100(buffer, cmd, size, vnr); + else if (tconn->agreed_pro_version >= 95 && + size > DRBD_MAX_SIZE_H80_PACKET) return prepare_header95(buffer, cmd, size); else return prepare_header80(buffer, cmd, size); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d9bb1a5c756..0f52b88719c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2833,8 +2833,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) retcode = ERR_INVALID_REQUEST; goto out; } - /* FIXME we need a define here */ - if (adm_ctx.volume >= 256) { + if (adm_ctx.volume > DRBD_VOLUME_MAX) { drbd_msg_put_info("requested volume id out of range"); retcode = ERR_INVALID_REQUEST; goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7e0ab2246fb..311b95453cb 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -983,8 +983,18 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i { unsigned int header_size = drbd_header_size(tconn); - if (header_size == sizeof(struct p_header95) && - *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { + if (header_size == sizeof(struct p_header100) && + *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { + struct p_header100 *h = header; + if (h->pad != 0) { + conn_err(tconn, "Header padding is not zero\n"); + return -EINVAL; + } + pi->vnr = be16_to_cpu(h->volume); + pi->cmd = be16_to_cpu(h->command); + pi->size = be32_to_cpu(h->length); + } else if (header_size == sizeof(struct p_header95) && + *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { struct p_header95 *h = header; pi->cmd = be16_to_cpu(h->command); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 60d30881909..fe8d6ba31bc 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -341,6 +341,7 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +#define DRBD_MAGIC_100 0x8620ec20 /* how I came up with this magic? * base64 decode "actlog==" ;) */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 938e8560a83..10144d546a6 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -95,8 +95,6 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - /* currently only 256 volumes per group, - * but maybe we still change that */ __u32_field(1, GENLA_F_MANDATORY, ctx_volume) __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) ) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 659a8eb3883..7f5149bef70 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -19,6 +19,8 @@ #define DRBD_MINOR_COUNT_MAX 256 #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_VOLUME_MAX 65535 + #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 -- cgit v1.2.3-70-g09d2 From d8cd289dbe69ce9b8115d6f200ceff657e5dafa0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 May 2011 12:27:11 +0200 Subject: drbd: Remove left-over unused define Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7f5149bef70..bcebb016fda 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,5 +170,4 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C -#undef RANGE #endif -- cgit v1.2.3-70-g09d2 From 7bac3e6f7e74993475a94487effe05dc1f68bdc7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 17:50:57 +0200 Subject: drbd: Also define the default values of boolean flags in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 16 ++++++++-------- include/linux/drbd_limits.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6632d10f1ee..02647dc8c67 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -122,10 +122,10 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) - __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) - __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) - __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, 1) + __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -160,12 +160,12 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) + __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, 1) - __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) + __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) + __flg_field_def(31, GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index bcebb016fda..3d3e2d5125c 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,4 +170,14 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C +#define DRBD_DISK_BARRIER_DEF 0 +#define DRBD_DISK_FLUSHES_DEF 1 +#define DRBD_DISK_DRAIN_DEF 1 +#define DRBD_MD_FLUSHES_DEF 1 +#define DRBD_TCP_CORK_DEF 1 + +#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 +#define DRBD_ALWAYS_ASBP_DEF 0 +#define DRBD_USE_RLE_DEF 0 + #endif -- cgit v1.2.3-70-g09d2 From 6394b9358e6187414b7a6de7ba2c681ee4a790ac Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:29:52 +0200 Subject: drbd: Refer to resync-rate consistently throughout the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fa36757ffc4..8026adacd3d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -389,21 +389,21 @@ struct p_barrier_ack { } __packed; struct p_rs_param { - u32 rate; + u32 resync_rate; /* Since protocol version 88 and higher. */ char verify_alg[0]; } __packed; struct p_rs_param_89 { - u32 rate; + u32 resync_rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; } __packed; struct p_rs_param_95 { - u32 rate; + u32 resync_rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; u32 c_plan_ahead; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 86c8bc5ac60..26d7763d525 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -888,14 +888,14 @@ int drbd_send_sync_param(struct drbd_conf *mdev) if (get_ldev(mdev)) { dc = rcu_dereference(mdev->ldev->disk_conf); - p->rate = cpu_to_be32(dc->resync_rate); + p->resync_rate = cpu_to_be32(dc->resync_rate); p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); p->c_delay_target = cpu_to_be32(dc->c_delay_target); p->c_fill_target = cpu_to_be32(dc->c_fill_target); p->c_max_rate = cpu_to_be32(dc->c_max_rate); put_ldev(mdev); } else { - p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF); p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e4e8f8a408d..684f7954272 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3221,7 +3221,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) old_disk_conf = mdev->ldev->disk_conf; *new_disk_conf = *old_disk_conf; - new_disk_conf->resync_rate = be32_to_cpu(p->rate); + new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); } if (apv >= 88) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6aece551d87..778708d9293 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -113,7 +113,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RATE_DEF) + __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3d3e2d5125c..48339ae69d5 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -98,10 +98,11 @@ /* syncer { */ /* FIXME allow rate to be zero? */ -#define DRBD_RATE_MIN 1 +#define DRBD_RESYNC_RATE_MIN 1 /* channel bonding 10 GbE, or other hardware */ -#define DRBD_RATE_MAX (4 << 20) -#define DRBD_RATE_DEF 250 /* kb/second */ +#define DRBD_RESYNC_RATE_MAX (4 << 20) +#define DRBD_RESYNC_RATE_DEF 250 +#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ /* less than 7 would hit performance unnecessarily. * 919 slots context information per transaction, -- cgit v1.2.3-70-g09d2 From 95f8efd08bcce65df994049a292b94e56c7ada67 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 May 2011 11:15:34 +0200 Subject: drbd: Fix the upper limit of resync-after The 32-bit resync_after netlink field takes a device minor number as parameter, which is no longer limited to 255. We cannot statically verify which device numbers are valid, so set the ummer limit to the highest possible signed 32-bit integer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 26 +++++++++++++------------- include/linux/drbd.h | 4 ++-- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 6 files changed, 24 insertions(+), 23 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8026adacd3d..e1672284076 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1408,8 +1408,8 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor); -void drbd_sync_after_changed(struct drbd_conf *mdev); +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); +void drbd_resync_after_changed(struct drbd_conf *mdev); extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9a82306adf9..74c27f1507f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1183,10 +1183,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } write_lock_irq(&global_state_lock); - retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after); + retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after); if (retcode == NO_ERROR) { rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - drbd_sync_after_changed(mdev); + drbd_resync_after_changed(mdev); } write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ec8f4245ef9..6410c55831e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -57,7 +57,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel); /* About the global_state_lock Each state transition on an device holds a read lock. In case we have - to evaluate the sync after dependencies, we grab a write lock, because + to evaluate the resync after dependencies, we grab a write lock, because we need stable states on all devices for that. */ rwlock_t global_state_lock; @@ -1340,17 +1340,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) static int _drbd_may_sync_now(struct drbd_conf *mdev) { struct drbd_conf *odev = mdev; - int ra; + int resync_after; while (1) { if (!odev->ldev) return 1; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); - if (ra == -1) + if (resync_after == -1) return 1; - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1426,36 +1426,36 @@ void suspend_other_sg(struct drbd_conf *mdev) } /* caller must hold global_state_lock */ -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) { struct drbd_conf *odev; - int ra; + int resync_after; if (o_minor == -1) return NO_ERROR; if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) - return ERR_SYNC_AFTER; + return ERR_RESYNC_AFTER; /* check for loops */ odev = minor_to_mdev(o_minor); while (1) { if (odev == mdev) - return ERR_SYNC_AFTER_CYCLE; + return ERR_RESYNC_AFTER_CYCLE; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); /* dependency chain ends here, no cycles. */ - if (ra == -1) + if (resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); } } /* caller must hold global_state_lock */ -void drbd_sync_after_changed(struct drbd_conf *mdev) +void drbd_resync_after_changed(struct drbd_conf *mdev) { int changes; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 6c7c85d8fc4..05063e6db81 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -130,8 +130,8 @@ enum drbd_ret_code { ERR_INTR = 129, /* EINTR */ ERR_RESIZE_RESYNC = 130, ERR_NO_PRIMARY = 131, - ERR_SYNC_AFTER = 132, - ERR_SYNC_AFTER_CYCLE = 133, + ERR_RESYNC_AFTER = 132, + ERR_RESYNC_AFTER_CYCLE = 133, ERR_PAUSE_IS_SET = 134, ERR_PAUSE_IS_CLEAR = 135, ERR_PACKET_NR = 137, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 67c816c0fc2..a59466f7f66 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 48339ae69d5..c4a8f0fef7b 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,9 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_AFTER_MIN -1 -#define DRBD_AFTER_MAX 255 -#define DRBD_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_MIN -1 +#define DRBD_RESYNC_AFTER_MAX (1<<30) +#define DRBD_RESYNC_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_SCALE '1' /* } */ -- cgit v1.2.3-70-g09d2 From c5482bbd9607bf38cbc952eacaa429e6ba3160a0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:44:55 +0200 Subject: drbd: Rename DISK_SIZE_SECT -> DISK_SIZE We don't have the units in constant names in other places, either. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index c4a8f0fef7b..8f8bbea545e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,9 +125,10 @@ * the upper limit with 64bit kernel, enough ram and flexible meta data * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ -#define DRBD_DISK_SIZE_SECT_MIN 0 -#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) -#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_MIN 0 +#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_SCALE 's' /* sectors */ #define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE -- cgit v1.2.3-70-g09d2 From 309f0b70ab789bf85c5f5f32dbc466d42f024747 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 May 2011 01:24:14 +0200 Subject: drbd: Use more generic constant names These constants are useful for the same purpose in more than one place. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 8f8bbea545e..3627f760966 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,10 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_RESYNC_AFTER_MIN -1 -#define DRBD_RESYNC_AFTER_MAX (1<<30) -#define DRBD_RESYNC_AFTER_DEF -1 -#define DRBD_RESYNC_AFTER_SCALE '1' +#define DRBD_MINOR_NUMBER_MIN -1 +#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_DEF -1 +#define DRBD_MINOR_NUMBER_SCALE '1' /* } */ -- cgit v1.2.3-70-g09d2 From f03c254961cce65ee2b21c4beccb6975b6f9d308 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 20 Jun 2011 22:21:19 +0200 Subject: drbd: allow ping-timeout of up to 30 seconds Allow up to 300 centi-seconds to be configured for the "ping timeout". There may be setups where heavy congestion, huge buffers, and asymmetric bandwidth limitations may need a "huge" ping-timeout as work-around for "spurious connection loss" problems. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3627f760966..82db83410f0 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -62,7 +62,7 @@ /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 -#define DRBD_PING_TIMEO_MAX 100 +#define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 /* max number of write requests between write barriers */ -- cgit v1.2.3-70-g09d2 From d942ae44537669418a7cbfd916531d30513dbca8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 31 May 2011 13:07:24 +0200 Subject: drbd: Fixes from the 8.3 development branch * commit 'ae57a0a': drbd: Only print sanitize state's warnings, if the state change happens drbd: we should write meta data updates with FLUSH FUA drbd: fix limit define, we support 1 PiByte now drbd: fix log message argument order drbd: Typo in user-visible message. drbd: Make "(rcv|snd)buf-size" and "ping-timeout" available for the proxy, too. drbd: Allow keywords to be used in multiple config sections. drbd: fix typos in comments. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++++---- drivers/block/drbd/drbd_state.c | 56 ++++++++++++++++++++++++++++++----------- include/linux/drbd_limits.h | 2 +- 3 files changed, 47 insertions(+), 21 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index de42c7cf7ca..1d71b3a3586 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -372,11 +372,11 @@ struct p_connection_features { u32 protocol_max; /* should be more than enough for future enhancements - * for now, feature_flags and the reserverd array shall be zero. + * for now, feature_flags and the reserved array shall be zero. */ u32 _pad; - u64 reserverd[7]; + u64 reserved[7]; } __packed; struct p_barrier { @@ -914,7 +914,7 @@ struct drbd_conf { atomic_t ap_bio_cnt; /* Requests we need to complete */ atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ - atomic_t unacked_cnt; /* Need to send replys for */ + atomic_t unacked_cnt; /* Need to send replies for */ atomic_t local_cnt; /* Waiting for local completion */ /* Interval tree of pending local requests */ @@ -2153,7 +2153,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) /* disk state is stable as well. */ break; - /* no new io accepted during tansitional states */ + /* no new io accepted during transitional states */ case D_ATTACHING: case D_FAILED: case D_NEGOTIATING: @@ -2217,7 +2217,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev) /* we wait here * as long as the device is suspended * until the bitmap is no longer on the fly during connection - * handshake as long as we would exeed the max_buffer limit. + * handshake as long as we would exceed the max_buffer limit. * * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8c9d0348736..2cf69b25f1e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,6 +37,15 @@ struct after_state_chg_work { struct completion *done; }; +enum sanitize_state_warnings { + NO_WARNING, + ABORTED_ONLINE_VERIFY, + ABORTED_RESYNC, + CONNECTION_LOST_NEGOTIATING, + IMPLICITLY_UPGRADED_DISK, + IMPLICITLY_UPGRADED_PDSK, +}; + static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); @@ -44,7 +53,7 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort); + enum sanitize_state_warnings *warn); static inline bool is_susp(union drbd_state s) { @@ -656,6 +665,21 @@ is_valid_transition(union drbd_state os, union drbd_state ns) return rv; } +static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) +{ + static const char *msg_table[] = { + [NO_WARNING] = "", + [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", + [ABORTED_RESYNC] = "Resync aborted.", + [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", + [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", + [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", + }; + + if (warn != NO_WARNING) + dev_warn(DEV, "%s\n", msg_table[warn]); +} + /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition * @mdev: DRBD device. @@ -667,11 +691,14 @@ is_valid_transition(union drbd_state os, union drbd_state ns) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort) + enum sanitize_state_warnings *warn) { enum drbd_fencing_p fp; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; + if (warn) + *warn = NO_WARNING; + fp = FP_DONT_CARE; if (get_ldev(mdev)) { rcu_read_lock(); @@ -695,10 +722,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* An implication of the disk states onto the connection state */ /* Abort resync if a disk fails/detaches */ if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn_sync_abort) - *warn_sync_abort = - ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? - "Online-verify" : "Resync"; + if (warn) + *warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? + ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; ns.conn = C_CONNECTED; } @@ -709,7 +735,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { - dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + if (warn) + *warn = CONNECTION_LOST_NEGOTIATING; ns.disk = D_DISKLESS; ns.pdsk = D_UNKNOWN; } @@ -791,16 +818,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = disk_max; if (ns.disk < disk_min) { - dev_warn(DEV, "Implicitly set disk from %s to %s\n", - drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_DISK; ns.disk = disk_min; } if (ns.pdsk > pdsk_max) ns.pdsk = pdsk_max; if (ns.pdsk < pdsk_min) { - dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", - drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_PDSK; ns.pdsk = pdsk_min; } @@ -875,12 +902,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, { union drbd_state os; enum drbd_state_rv rv = SS_SUCCESS; - const char *warn_sync_abort = NULL; + enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; os = drbd_read_state(mdev); - ns = sanitize_state(mdev, ns, &warn_sync_abort); + ns = sanitize_state(mdev, ns, &ssw); if (ns.i == os.i) return SS_NOTHING_TO_DO; @@ -909,8 +936,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } - if (warn_sync_abort) - dev_warn(DEV, "%s aborted.\n", warn_sync_abort); + print_sanitize_warnings(mdev, ssw); drbd_pr_state_change(mdev, os, ns, flags); diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 82db83410f0..f1046b13d9f 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -126,7 +126,7 @@ * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ #define DRBD_DISK_SIZE_MIN 0 -#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ #define DRBD_DISK_SIZE_SCALE 's' /* sectors */ -- cgit v1.2.3-70-g09d2 From cdfda633d235028e9b27381dedb65416409e8729 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 Jul 2011 15:38:59 +0200 Subject: drbd: detach from frozen backing device * drbd-8.3: documentation: Documented detach's --force and disk's --disk-timeout drbd: Implemented the disk-timeout option drbd: Force flag for the detach operation drbd: Allow new IOs while the local disk in in FAILED state drbd: Bitmap IO functions can not return prematurely if the disk breaks drbd: Added a kref to bm_aio_ctx drbd: Hold a reference to ldev while doing meta-data IO drbd: Keep a reference to the bio until the completion handler finished drbd: Implemented wait_until_done_or_disk_failure() drbd: Replaced md_io_mutex by an atomic: md_io_in_use drbd: moved md_io into mdev drbd: Immediately allow completion of IOs, that wait for IO completions on a failed disk drbd: Keep a reference to barrier acked requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 75 ++++++++++++++++++------ drivers/block/drbd/drbd_bitmap.c | 115 +++++++++++++++++++++++++++---------- drivers/block/drbd/drbd_int.h | 12 ++-- drivers/block/drbd/drbd_main.c | 77 ++++++++++++++++++++++--- drivers/block/drbd/drbd_nl.c | 28 ++++++++- drivers/block/drbd/drbd_receiver.c | 2 - drivers/block/drbd/drbd_req.c | 52 +++++++++++------ drivers/block/drbd/drbd_req.h | 19 +++--- drivers/block/drbd/drbd_state.c | 7 +++ drivers/block/drbd/drbd_worker.c | 9 ++- include/linux/drbd_genl.h | 9 ++- include/linux/drbd_limits.h | 6 ++ 12 files changed, 321 insertions(+), 90 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aeb483daea0..58b5b61628f 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -114,18 +114,44 @@ struct drbd_atodb_wait { static int w_al_write_transaction(struct drbd_work *, int); +void *drbd_md_get_buffer(struct drbd_conf *mdev) +{ + int r; + + wait_event(mdev->misc_wait, + (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || + mdev->state.disk <= D_FAILED); + + return r ? NULL : page_address(mdev->md_io_page); +} + +void drbd_md_put_buffer(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->md_io_in_use)) + wake_up(&mdev->misc_wait); +} + +static bool md_io_allowed(struct drbd_conf *mdev) +{ + enum drbd_disk_state ds = mdev->state.disk; + return ds >= D_NEGOTIATING || ds == D_ATTACHING; +} + +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) +{ + wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); +} + static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) { struct bio *bio; - struct drbd_md_io md_io; int err; - md_io.mdev = mdev; - init_completion(&md_io.event); - md_io.error = 0; + mdev->md_io.done = 0; + mdev->md_io.error = -ENODEV; if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) rw |= REQ_FUA | REQ_FLUSH; @@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, err = -EIO; if (bio_add_page(bio, page, size, 0) != size) goto out; - bio->bi_private = &md_io; + bio->bi_private = &mdev->md_io; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); + err = -ENODEV; + goto out; + } + + bio_get(bio); /* one bio_put() is in the completion handler */ + atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_for_completion(&md_io.event); + wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) - err = md_io.error; + err = mdev->md_io.error; out: bio_put(bio); @@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int err; struct page *iop = mdev->md_io_page; - D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); BUG_ON(!bdev->md_bdev); @@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } - mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ + if (!buffer) { + dev_err(DEV, "disk failed while waiting for md_io buffer\n"); + aw->err = -EIO; + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } memset(buffer, 0, sizeof(*buffer)); buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); @@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) mdev->al_tr_number++; } - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); @@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* lock out all other meta data io for now, * and make sure the page is mapped. */ - mutex_lock(&mdev->md_io_mutex); - b = page_address(mdev->md_io_page); + b = drbd_md_get_buffer(mdev); + if (!b) + return 0; /* Always use the full ringbuffer space for now. * possible optimization: read in all of it, @@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* IO error */ if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!found_valid) { if (found_initialized != mx) dev_warn(DEV, "No usable activity log found.\n"); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 1; } @@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!expect(rv != 0)) goto cancel; if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -643,7 +684,7 @@ cancel: mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", transactions, active_extents); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 52c48143b22..706e5220dd4 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) struct bm_aio_ctx { struct drbd_conf *mdev; atomic_t in_flight; - struct completion done; + unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 #define BM_AIO_WRITE_HINTED 2 int error; + struct kref kref; }; +static void bm_aio_ctx_destroy(struct kref *kref) +{ + struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + + put_ldev(ctx->mdev); + kfree(ctx); +} + /* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { @@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error) bio_put(bio); - if (atomic_dec_and_test(&ctx->in_flight)) - complete(&ctx->done); + if (atomic_dec_and_test(&ctx->in_flight)) { + ctx->done = 1; + wake_up(&mdev->misc_wait); + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + } } static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - struct bio *bio = bio_alloc_drbd(GFP_KERNEL); + struct bio *bio = bio_alloc_drbd(GFP_NOIO); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; @@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must */ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { - struct bm_aio_ctx ctx = { - .mdev = mdev, - .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = flags, - }; + struct bm_aio_ctx *ctx; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; unsigned long now; @@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * For lazy writeout, we don't care for ongoing changes to the bitmap, * as we submit copies of pages anyways. */ - if (!ctx.flags) + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { + .mdev = mdev, + .in_flight = ATOMIC_INIT(1), + .done = 0, + .flags = flags, + .error = 0, + .kref = { ATOMIC_INIT(2) }, + }; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); + err = -ENODEV; + goto out; + } + + if (!ctx->flags) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); num_pages = b->bm_number_of_pages; @@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w continue; } } - atomic_inc(&ctx.in_flight); - bm_page_io_async(&ctx, i, rw); + atomic_inc(&ctx->in_flight); + bm_page_io_async(ctx, i, rw); ++count; cond_resched(); } /* - * We initialize ctx.in_flight to one to make sure bm_async_io_complete - * will not complete() early, and decrement / test it here. If there + * We initialize ctx->in_flight to one to make sure bm_async_io_complete + * will not set ctx->done early, and decrement / test it here. If there * are still some bios in flight, we need to wait for them here. + * If all IO is done already (or nothing had been submitted), there is + * no need to wait. Still, we need to put the kref associated with the + * "in_flight reached zero, all done" event. */ - if (!atomic_dec_and_test(&ctx.in_flight)) - wait_for_completion(&ctx.done); + if (!atomic_dec_and_test(&ctx->in_flight)) + wait_until_done_or_disk_failure(mdev, &ctx->done); + else + kref_put(&ctx->kref, &bm_aio_ctx_destroy); /* summary for global bitmap IO */ if (flags == 0) dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); - if (ctx.error) { + if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, true); - err = -EIO; /* ctx.error ? */ + err = -EIO; /* ctx->error ? */ } + if (atomic_read(&ctx->in_flight)) + err = -EIO; /* Disk failed during IO... */ + now = jiffies; if (rw == WRITE) { drbd_md_flush(mdev); @@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); +out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - struct bm_aio_ctx ctx = { + struct bm_aio_ctx *ctx; + int err; + + if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { + dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); + return 0; + } + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { .mdev = mdev, .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), + .done = 0, .flags = BM_AIO_COPY_PAGES, + .error = 0, + .kref = { ATOMIC_INIT(2) }, }; - if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { - dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); - return 0; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); + err = -ENODEV; + goto out; } - bm_page_io_async(&ctx, idx, WRITE_SYNC); - wait_for_completion(&ctx.done); + bm_page_io_async(ctx, idx, WRITE_SYNC); + wait_until_done_or_disk_failure(mdev, &ctx->done); - if (ctx.error) + if (ctx->error) drbd_chk_io_error(mdev, 1, true); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; - return ctx.error; + err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; + out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + return err; } /* NOTE diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6035784f0de..4e582058a7c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -780,8 +780,7 @@ struct drbd_backing_dev { }; struct drbd_md_io { - struct drbd_conf *mdev; - struct completion event; + unsigned int done; int error; }; @@ -852,6 +851,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct list_head barrier_acked_requests; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ @@ -978,7 +978,8 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct mutex md_io_mutex; /* protects the md_io_buffer */ + struct drbd_md_io md_io; + atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ spinlock_t al_lock; wait_queue_head_t al_wait; struct lru_cache *act_log; /* activity log */ @@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); extern int drbd_resync_finished(struct drbd_conf *mdev); /* maybe rather drbd_main.c ? */ +extern void *drbd_md_get_buffer(struct drbd_conf *mdev); +extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) @@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) case D_OUTDATED: case D_CONSISTENT: case D_UP_TO_DATE: + case D_FAILED: /* disk state is stable as well. */ break; /* no new io accepted during transitional states */ case D_ATTACHING: - case D_FAILED: case D_NEGOTIATING: case D_UNKNOWN: case D_MASK: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 448de7bf822..15384986e4a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn) tconn->oldest_tle = b; tconn->newest_tle = b; INIT_LIST_HEAD(&tconn->out_of_sequence_requests); + INIT_LIST_HEAD(&tconn->barrier_acked_requests); return 1; } @@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, These have been list_move'd to the out_of_sequence_requests list in _req_mod(, BARRIER_ACKED) above. */ - list_del_init(&b->requests); + list_splice_init(&b->requests, &tconn->barrier_acked_requests); mdev = b->w.mdev; nob = b->next; @@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) b = tmp; list_splice(&carry_reads, &b->requests); } -} + /* Actions operating on the disk state, also want to work on + requests that got barrier acked. */ + switch (what) { + case FAIL_FROZEN_DISK_IO: + case RESTART_FROZEN_DISK_IO: + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); + } + case CONNECTION_LOST_WHILE_PENDING: + case RESEND: + break; + default: + conn_err(tconn, "what = %d in _tl_restart()\n", what); + } +} /** * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL @@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) spin_unlock_irq(&tconn->req_lock); } +/** + * tl_apply() - Applies an event to all requests for a certain mdev in the TL + * @mdev: DRBD device. + * @what: The action/event to perform with all request objects + * + * @what might ony be ABORT_DISK_IO. + */ +void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_tl_epoch *b; + struct list_head *le, *tle; + struct drbd_request *req; + + D_ASSERT(what == ABORT_DISK_IO); + + spin_lock_irq(&tconn->req_lock); + b = tconn->oldest_tle; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + b = b->next; + } + + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + + spin_unlock_irq(&tconn->req_lock); +} + static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; @@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); atomic_set(&mdev->ap_in_flight, 0); + atomic_set(&mdev->md_io_in_use, 0); - mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->own_state_mutex); mdev->state_mutex = &mdev->own_state_mutex; @@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref) struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); struct drbd_tconn *tconn = mdev->tconn; + del_timer_sync(&mdev->request_timer); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + memset(buffer, 0, 512); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev) * since we updated it on metadata. */ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); +out: put_ldev(mdev); } @@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is @@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; err: - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); + out: put_ldev(mdev); return rv; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 97d1dab045d..bf8d0b07762 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); + mod_timer(&mdev->request_timer, jiffies + HZ); goto success; fail_unlock: @@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (rv < SS_SUCCESS) goto force_diskless_dec; + mod_timer(&mdev->request_timer, jiffies + HZ); + if (mdev->state.role == R_PRIMARY) mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; else @@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } -static int adm_detach(struct drbd_conf *mdev) +static int adm_detach(struct drbd_conf *mdev, int force) { enum drbd_state_rv retcode; int ret; + + if (force) { + drbd_force_state(mdev, NS(disk, D_FAILED)); + retcode = SS_SUCCESS; + goto out; + } + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); /* D_FAILED will transition to DISKLESS. */ @@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; +out: return retcode; } @@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev) int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; + struct detach_parms parms = { }; + int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = adm_detach(adm_ctx.mdev); + if (info->attrs[DRBD_NLA_DETACH_PARMS]) { + err = detach_parms_from_attrs(&parms, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + retcode = adm_detach(adm_ctx.mdev, parms.force_detach); out: drbd_adm_finish(info, retcode); return 0; @@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_detach(mdev); + retcode = adm_detach(mdev, 0); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to detach"); goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7218750d293..3a7e54b8f41 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); wake_up(&mdev->misc_wait); - del_timer(&mdev->request_timer); - del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c4e4553f5c2..8fa51cda3b7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; - /* only WRITES may end up here without a master bio (on barrier ack) */ - int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ - if (s & RQ_LOCAL_PENDING) + if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) return; if (req->i.waiting) { /* Retry all conflicting peer requests. */ @@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) req->master_bio = NULL; } + if (s & RQ_LOCAL_PENDING) + return; + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, * or protocol C P_WRITE_ACK, @@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case COMPLETED_OK: - if (bio_data_dir(req->master_bio) == WRITE) + if (req->rq_state & RQ_WRITE) mdev->writ_cnt += req->i.size >> 9; else mdev->read_cnt += req->i.size >> 9; @@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; + case ABORT_DISK_IO: + req->rq_state |= RQ_LOCAL_ABORTED; + if (req->rq_state & RQ_WRITE) + _req_may_be_done_not_susp(req, m); + else + goto goto_queue_for_net_read; + break; + case WRITE_COMPLETED_WITH_ERROR: req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); put_ldev(mdev); + goto_queue_for_net_read: + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { @@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data) struct drbd_request *req; /* oldest request */ struct list_head *le; struct net_conf *nc; - unsigned long et; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */ rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + + if (get_ldev(mdev)) { + dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; + put_ldev(mdev); + } rcu_read_unlock(); - if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) + et = min_not_zero(dt, ent); + + if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) return; /* Recurring timer stopped */ spin_lock_irq(&tconn->req_lock); @@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data) le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (time_is_before_eq_jiffies(req->start_time + et)) { - if (req->rq_state & RQ_NET_PENDING) { + if (ent && req->rq_state & RQ_NET_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + ent)) { dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); - } else { - dev_warn(DEV, "Local backing block device frozen?\n"); - mod_timer(&mdev->request_timer, jiffies + et); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + } + } + if (dt && req->rq_state & RQ_LOCAL_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); } - } else { - mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&tconn->req_lock); + mod_timer(&mdev->request_timer, req->start_time + et); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 5135c95fbf8..f6aff150add 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -106,6 +106,7 @@ enum drbd_req_event { READ_COMPLETED_WITH_ERROR, READ_AHEAD_COMPLETED_WITH_ERROR, WRITE_COMPLETED_WITH_ERROR, + ABORT_DISK_IO, COMPLETED_OK, RESEND, FAIL_FROZEN_DISK_IO, @@ -119,18 +120,21 @@ enum drbd_req_event { * same time, so we should hold the request lock anyways. */ enum drbd_req_state_bits { - /* 210 - * 000: no local possible - * 001: to be submitted + /* 3210 + * 0000: no local possible + * 0001: to be submitted * UNUSED, we could map: 011: submitted, completion still pending - * 110: completed ok - * 010: completed with error + * 0110: completed ok + * 0010: completed with error + * 1001: Aborted (before completion) + * 1x10: Aborted and completed -> free */ __RQ_LOCAL_PENDING, __RQ_LOCAL_COMPLETED, __RQ_LOCAL_OK, + __RQ_LOCAL_ABORTED, - /* 76543 + /* 87654 * 00000: no network possible * 00001: to be send * 00011: to be send, on worker queue @@ -209,8 +213,9 @@ enum drbd_req_state_bits { #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) +#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) -#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ +#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4c13a6f4f18..f51cefdbeff 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -29,6 +29,9 @@ #include "drbd_int.h" #include "drbd_req.h" +/* in drbd_main.c */ +extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what); + struct after_state_chg_work { struct drbd_work w; union drbd_state os; @@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, rcu_read_unlock(); was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_apply(mdev, ABORT_DISK_IO); + /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, * and that may only happen after our put_ldev below. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6410c55831e..dac8d9bc4be 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -67,11 +67,18 @@ rwlock_t global_state_lock; void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; + struct drbd_conf *mdev; md_io = (struct drbd_md_io *)bio->bi_private; + mdev = container_of(md_io, struct drbd_conf, md_io); + md_io->error = error; - complete(&md_io->event); + md_io->done = 1; + wake_up(&mdev->misc_wait); + bio_put(bio); + drbd_md_put_buffer(mdev); + put_ldev(mdev); } /* reads on behalf of the partner, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index e879a932438..2e6cefefe5e 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) +GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) +) + /* * Notifications and commands (genlmsghdr->cmd) */ @@ -335,7 +340,9 @@ GENL_op( ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) + GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index f1046b13d9f..ddd332db2a5 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -50,6 +50,12 @@ #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + /* If backing disk takes longer than disk_timeout, mark the disk as failed */ +#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ +#define DRBD_DISK_TIMEOUT_SCALE '1' + /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 -- cgit v1.2.3-70-g09d2 From 65d94927e036cd8e8e1406fa7fc387b4ae730159 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Jul 2011 10:24:51 +0200 Subject: drbd: Changed some defaults * Enabled the resync controller, with a fill target of 50Kib. That gives reasonable resync speeds without tuning. A much better default than the 250KiB/s fixed. * Enable bitmap compression. It is save to use, and most people have more CPU power than network bandwidth. * ko-count of 7: Abort a connection if the peer fails to process a write request within 42 seconds. * al-extents of 1237: ~5 GiB seems to be a much more sane default these days. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index ddd332db2a5..defdebfecb7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX 256 +#define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_VOLUME_MAX 65535 @@ -99,7 +99,7 @@ * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 -#define DRBD_KO_COUNT_DEF 0 +#define DRBD_KO_COUNT_DEF 7 /* } */ /* syncer { */ @@ -117,7 +117,7 @@ * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 -#define DRBD_AL_EXTENTS_DEF 127 +#define DRBD_AL_EXTENTS_DEF 1237 #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -151,7 +151,7 @@ #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 -#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ +#define DRBD_C_PLAN_AHEAD_DEF 20 #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 @@ -159,7 +159,7 @@ #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ -#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ +#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ #define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ #define DRBD_C_MAX_RATE_MAX (4 << 20) @@ -167,7 +167,7 @@ #define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ #define DRBD_C_MIN_RATE_MAX (4 << 20) -#define DRBD_C_MIN_RATE_DEF 4096 +#define DRBD_C_MIN_RATE_DEF 250 #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ @@ -187,6 +187,6 @@ #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -#define DRBD_USE_RLE_DEF 0 +#define DRBD_USE_RLE_DEF 1 #endif -- cgit v1.2.3-70-g09d2 From 32bdb64038ba3127245912dae2cc8a450bb1d705 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 May 2011 18:26:20 +0200 Subject: drbd: Define scale factors in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index defdebfecb7..cd3565cfed4 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -18,29 +18,35 @@ #define DRBD_MINOR_COUNT_MIN 1 #define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_MINOR_COUNT_SCALE '1' #define DRBD_VOLUME_MAX 65535 #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 +#define DRBD_DIALOG_REFRESH_SCALE '1' /* valid port number */ #define DRBD_PORT_MIN 1 #define DRBD_PORT_MAX 0xffff +#define DRBD_PORT_SCALE '1' /* startup { */ /* if you want more than 3.4 days, disable */ #define DRBD_WFC_TIMEOUT_MIN 0 #define DRBD_WFC_TIMEOUT_MAX 300000 #define DRBD_WFC_TIMEOUT_DEF 0 +#define DRBD_WFC_TIMEOUT_SCALE '1' #define DRBD_DEGR_WFC_TIMEOUT_MIN 0 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0 +#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1' #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1' /* }*/ /* net { */ @@ -49,6 +55,7 @@ #define DRBD_TIMEOUT_MIN 1 #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ +#define DRBD_TIMEOUT_SCALE '1' /* If backing disk takes longer than disk_timeout, mark the disk as failed */ #define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ @@ -60,46 +67,55 @@ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 #define DRBD_CONNECT_INT_DEF 10 /* seconds */ +#define DRBD_CONNECT_INT_SCALE '1' /* keep-alive probes when idle */ #define DRBD_PING_INT_MIN 1 #define DRBD_PING_INT_MAX 120 #define DRBD_PING_INT_DEF 10 +#define DRBD_PING_INT_SCALE '1' /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 #define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 +#define DRBD_PING_TIMEO_SCALE '1' /* max number of write requests between write barriers */ #define DRBD_MAX_EPOCH_SIZE_MIN 1 #define DRBD_MAX_EPOCH_SIZE_MAX 20000 #define DRBD_MAX_EPOCH_SIZE_DEF 2048 +#define DRBD_MAX_EPOCH_SIZE_SCALE '1' /* I don't think that a tcp send buffer of more than 10M is useful */ #define DRBD_SNDBUF_SIZE_MIN 0 #define DRBD_SNDBUF_SIZE_MAX (10<<20) #define DRBD_SNDBUF_SIZE_DEF 0 +#define DRBD_SNDBUF_SIZE_SCALE '1' #define DRBD_RCVBUF_SIZE_MIN 0 #define DRBD_RCVBUF_SIZE_MAX (10<<20) #define DRBD_RCVBUF_SIZE_DEF 0 +#define DRBD_RCVBUF_SIZE_SCALE '1' /* @4k PageSize -> 128kB - 512MB */ #define DRBD_MAX_BUFFERS_MIN 32 #define DRBD_MAX_BUFFERS_MAX 131072 #define DRBD_MAX_BUFFERS_DEF 2048 +#define DRBD_MAX_BUFFERS_SCALE '1' /* @4k PageSize -> 4kB - 512MB */ #define DRBD_UNPLUG_WATERMARK_MIN 1 #define DRBD_UNPLUG_WATERMARK_MAX 131072 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) +#define DRBD_UNPLUG_WATERMARK_SCALE '1' /* 0 is disabled. * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 #define DRBD_KO_COUNT_DEF 7 +#define DRBD_KO_COUNT_SCALE '1' /* } */ /* syncer { */ @@ -118,6 +134,7 @@ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 1237 +#define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -148,34 +165,42 @@ #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 +#define DRBD_MAX_BIO_BVECS_SCALE '1' #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 #define DRBD_C_PLAN_AHEAD_DEF 20 +#define DRBD_C_PLAN_AHEAD_SCALE '1' #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 #define DRBD_C_DELAY_TARGET_DEF 10 +#define DRBD_C_DELAY_TARGET_SCALE '1' #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ #define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ +#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */ -#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MIN 250 #define DRBD_C_MAX_RATE_MAX (4 << 20) #define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MIN 0 #define DRBD_C_MIN_RATE_MAX (4 << 20) #define DRBD_C_MIN_RATE_DEF 250 +#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */ #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ #define DRBD_CONG_FILL_DEF 0 +#define DRBD_CONG_FILL_SCALE 's' /* sectors */ #define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE #define DRBD_PROTOCOL_DEF DRBD_PROT_C -- cgit v1.2.3-70-g09d2 From 0317d9ecbc9bac43642b4aa70e3e1106f4fd26a1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 13 Jul 2011 13:40:30 +0200 Subject: drbd: Fix the maximum accepted minor device number The maximum minor device number allowed by the kernel is (1<<20 - 1). Reject device numbers higher than that to earlier catch possible errors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index cd3565cfed4..7d956e91ae7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -137,7 +137,7 @@ #define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 -#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1) #define DRBD_MINOR_NUMBER_DEF -1 #define DRBD_MINOR_NUMBER_SCALE '1' -- cgit v1.2.3-70-g09d2 From b80c043327ea4faac62a329a1d35f16c47a5128e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 18 Jul 2011 11:09:17 +0200 Subject: drbd: The minor_count module parameter is only a hint nowadays * The max of minor_count is 255 * In drbdadm count the number of minors, instead of finding the highest minor number * No longer us the magic in the init script Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/drbd_limits.h') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7d956e91ae7..6d0a24331ed 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX (1U << 20) +#define DRBD_MINOR_COUNT_MAX 255 #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_MINOR_COUNT_SCALE '1' -- cgit v1.2.3-70-g09d2 From 380207d08e7c4d1b19c0323777278992b4fbf9d6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Nov 2011 12:31:20 +0100 Subject: drbd: Load balancing of read requests New config option for the disk secition "read-balancing", with the values: prefer-local, prefer-remote, round-robin, when-congested-remote. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_req.c | 57 +++++++++++++++++++++++++++++++++++++- include/linux/drbd.h | 8 ++++++ include/linux/drbd_genl.h | 1 + include/linux/drbd_limits.h | 1 + 6 files changed, 68 insertions(+), 2 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d397681fb7a..e2cccb40f5a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -698,6 +698,7 @@ enum { AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ B_RS_H_DONE, /* Before resync handler done (already executed) */ DISCARD_MY_DATA, /* discard_my_data flag per volume */ + READ_BALANCE_RR, }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e546dd3fab8..733b8bd663d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", + dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ceb04a94aac..98251e2a7fb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + /* if it is still queued, we may not complete it here. * it will be canceled soon. */ if (!(req->rq_state & RQ_NET_QUEUED)) @@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; + + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + _req_may_be_done_not_susp(req, m); /* else: done by HANDED_OVER_TO_NETWORK */ break; + goto_read_retry_local: + req->rq_state |= RQ_LOCAL_PENDING; + req->private_bio->bi_bdev = mdev->ldev->backing_bdev; + generic_make_request(req->private_bio); + break; + case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; @@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); + if (!IS_ERR_OR_NULL(req->private_bio)) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } _req_may_be_done_not_susp(req, m); break; }; @@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } +static bool remote_due_to_read_balancing(struct drbd_conf *mdev) +{ + enum drbd_read_balancing rbm; + struct backing_dev_info *bdi; + + if (mdev->state.pdsk < D_UP_TO_DATE) + return false; + + rcu_read_lock(); + rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; + rcu_read_unlock(); + + switch (rbm) { + case RB_CONGESTED_REMOTE: + bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + return bdi_read_congested(bdi); + case RB_LEAST_PENDING: + return atomic_read(&mdev->local_cnt) > + atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + case RB_ROUND_ROBIN: + return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); + case RB_PREFER_REMOTE: + return true; + case RB_PREFER_LOCAL: + default: + return false; + } +} + /* * complete_conflicting_writes - wait for any conflicting write requests * @@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); + } else if (remote_due_to_read_balancing(mdev)) { + /* Keep the private bio in case we need it + for a local retry */ + local = 0; } } remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; @@ -1017,7 +1072,7 @@ fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, &req->i); fail_and_free_req: - if (local) { + if (!IS_ERR_OR_NULL(req->private_bio)) { bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e9f754b66a..157ba3d74dc 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -102,6 +102,14 @@ enum drbd_on_congestion { OC_DISCONNECT, }; +enum drbd_read_balancing { + RB_PREFER_LOCAL, + RB_PREFER_REMOTE, + RB_ROUND_ROBIN, + RB_LEAST_PENDING, + RB_CONGESTED_REMOTE, +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_code { ERR_CODE_BASE = 100, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 2e6cefefe5e..826008f297f 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) + __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 6d0a24331ed..17ef66a5c11 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -161,6 +161,7 @@ #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_ON_CONGESTION_DEF OC_BLOCK +#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 -- cgit v1.2.3-70-g09d2 From 9a51ab1c1b3c1e21f076cdd571bbe6ca7d1b504c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 20 Feb 2012 21:53:28 +0100 Subject: drbd: New disk option al-updates By disabling al-updates one might increase performace. The price for that is that in case a crashed primary (that had al-updates disabled) is reintegraded, it will receive a full-resync instead of a bitmap based resync. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 12 ++++++++++-- drivers/block/drbd/drbd_nl.c | 17 +++++++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 3 +++ include/linux/drbd_limits.h | 1 + 5 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include/linux/drbd_limits.h') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9eae2894431..83d48d210b6 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -276,8 +276,16 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { - al_write_transaction(mdev); - mdev->al_writ_cnt++; + bool write_al_updates; + + rcu_read_lock(); + write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + rcu_read_unlock(); + + if (write_al_updates) { + al_write_transaction(mdev); + mdev->al_writ_cnt++; + } spin_lock_irq(&mdev->al_lock); /* FIXME diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index dc5bd6bbb28..c5d4fac1a11 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); + if (new_disk_conf->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); drbd_md_sync(mdev); @@ -1545,7 +1550,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } else if (dd == grew) set_bit(RESYNC_AFTER_NEG, &mdev->flags); - if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || + (test_bit(CRASHED_PRIMARY, &mdev->flags) && + drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, @@ -1588,13 +1595,19 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (ns.disk == D_CONSISTENT && (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; - rcu_read_unlock(); /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before this point, because drbd_request_state() modifies these flags. */ + if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + + rcu_read_unlock(); + /* In case we are C_CONNECTED postpone any decision on the new disk state after the negotiation phase. */ if (mdev->state.conn == C_CONNECTED) { diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e86156c10f..36ae7dd28d9 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -338,6 +338,7 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_PEER_OUT_DATED (1 << 5) #define MDF_CRASHED_PRIMARY (1 << 6) #define MDF_AL_CLEAN (1 << 7) +#define MDF_AL_DISABLED (1 << 8) enum drbd_uuid_index { UI_CURRENT, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 826008f297f..92ec4b50a88 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -130,6 +130,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) + /* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */ + __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -168,6 +170,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 17ef66a5c11..1fa19c5f5e6 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -210,6 +210,7 @@ #define DRBD_DISK_DRAIN_DEF 1 #define DRBD_MD_FLUSHES_DEF 1 #define DRBD_TCP_CORK_DEF 1 +#define DRBD_AL_UPDATES_DEF 1 #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -- cgit v1.2.3-70-g09d2