From 8fe39aac0578cbb0abf27e1be70ff581e0c1d836 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 22 Nov 2013 13:22:13 +0100 Subject: drbd: device->ldev is not guaranteed on an D_ATTACHING disk Some parts of the code assumed that get_ldev_if_state(device, D_ATTACHING) is sufficient to access the ldev member of the device object. That was wrong. ldev may not be there or might be freed at any time if the device has a disk state of D_ATTACHING. bm_rw() Documented that drbd_bm_read() is only called from drbd_adm_attach. drbd_bm_write() is only called when a reference is held, and it is documented that a caller has to hold a reference before calling drbd_bm_write() drbd_bm_write_page() Use get_ldev() instead of get_ldev_if_state(device, D_ATTACHING) drbd_bmio_set_n_write() No longer use get_ldev_if_state(device, D_ATTACHING). All callers hold a reference to ldev now. drbd_bmio_clear_n_write() All callers where holding a reference of ldev anyways. Remove the misleading get_ldev_if_state(device, D_ATTACHING) drbd_reconsider_max_bio_size() Removed the get_ldev_if_state(device, D_ATTACHING). All callers now pass a struct drbd_backing_dev* when they have a proper reference, or a NULL pointer. Before this fix, the receiver could trigger a NULL pointer deref when in drbd_reconsider_max_bio_size() drbd_bump_write_ordering() Used get_ldev_if_state(device, D_ATTACHING) with the wrong assumption. Remove it, and allow the caller to pass in a struct drbd_backing_dev* when the caller knows that accessing this bdev is safe. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 3dbe9bd57a0..20ec8903b1e 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -245,7 +245,7 @@ enum drbd_disk_state { D_DISKLESS, D_ATTACHING, /* In the process of reading the meta-data */ D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */ - /* when >= D_FAILED it is legal to access mdev->bc */ + /* when >= D_FAILED it is legal to access mdev->ldev */ D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */ D_INCONSISTENT, D_OUTDATED, -- cgit v1.2.3-70-g09d2 From aaaba34576407857f6146ff6c330f06e63fb2bf2 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 18 Mar 2014 12:30:09 +0100 Subject: drbd: implement csums-after-crash-only Checksum based resync trades CPU cycles for network bandwidth, in situations where we expect much of the to-be-resynced blocks to be actually identical on both sides already. In a "network hickup" scenario, it won't help: all to-be-resynced blocks will typically be different. The use case is for the resync of *potentially* different blocks after crash recovery -- the crash recovery had marked larger areas (those covered by the activity log) as need-to-be-resynced, just in case. Most of those blocks will be identical. This option makes it possible to configure checksum based resync, but only actually use it for the first resync after primary crash. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_receiver.c | 2 ++ drivers/block/drbd/drbd_worker.c | 24 ++++++++++++++++++++---- include/linux/drbd_genl.h | 3 +++ include/linux/drbd_limits.h | 1 + 5 files changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index abf5aefd979..fe6595a96a9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -738,6 +738,8 @@ struct drbd_device { struct rb_root read_requests; struct rb_root write_requests; + /* use checksums for *this* resync */ + bool use_csums; /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ unsigned long rs_total; /* number of resync blocks that failed in this run */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5626c5babc3..d326af67c27 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2555,6 +2555,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet peer_req->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ device->bm_resync_fo = BM_SECT_TO_BIT(sector); + /* remember to report stats in drbd_resync_finished */ + device->use_csums = true; } else if (pi->cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ atomic_add(size >> 9, &device->rs_sect_in); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2ff5fd49a3b..6532a697cf4 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -698,8 +698,8 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (connection->agreed_pro_version >= 89 && - connection->csums_tfm) { + + if (device->use_csums) { switch (read_for_csum(peer_device, sector, size)) { case -EIO: /* Disk failure */ put_ldev(device); @@ -913,7 +913,7 @@ int drbd_resync_finished(struct drbd_device *device) if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { + if (device->use_csums && device->rs_total) { const unsigned long s = device->rs_same_csum; const unsigned long t = device->rs_total; const int ratio = @@ -1622,6 +1622,18 @@ static void do_start_resync(struct drbd_device *device) clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); } +static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) +{ + bool csums_after_crash_only; + rcu_read_lock(); + csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; + rcu_read_unlock(); + return connection->agreed_pro_version >= 89 && /* supported? */ + connection->csums_tfm && /* configured? */ + (csums_after_crash_only == 0 /* use for each resync? */ + || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ +} + /** * drbd_start_resync() - Start the resync process * @device: DRBD device. @@ -1756,8 +1768,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) drbd_conn_str(ns.conn), (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), (unsigned long) device->rs_total); - if (side == C_SYNC_TARGET) + if (side == C_SYNC_TARGET) { device->bm_resync_fo = 0; + device->use_csums = use_checksum_based_resync(connection, device); + } else { + device->use_csums = 0; + } /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid * with w_send_oos, or the sync target will get confused as to diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 4193f5f2636..71fc924c53f 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -171,6 +171,9 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ + /* 9: __str_field_def(31, DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */ + /* 9: __u32_field(32, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_node_id) */ + __flg_field_def(33, 0 /* OPTIONAL */, csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 17e50bb0052..9d2df1d5141 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -214,6 +214,7 @@ #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 #define DRBD_USE_RLE_DEF 1 +#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0 #define DRBD_AL_STRIPES_MIN 1 #define DRBD_AL_STRIPES_MAX 1024 -- cgit v1.2.3-70-g09d2 From 5d0b17f1a29e8189d04aef447a3a53cfd05529b2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 18 Mar 2014 14:24:35 +0100 Subject: drbd: New net configuration option socket-check-timeout In setups involving a DRBD-proxy and connections that experience a lot of buffer-bloat it might be necessary to set ping-timeout to an unusual high value. By default DRBD uses the same value to wait if a newly established TCP-connection is stable. Since the DRBD-proxy is usually located in the same data center such a long wait time may hinder DRBD's connect process. In such setups socket-check-timeout should be set to at least to the round trip time between DRBD and DRBD-proxy. I.e. in most cases to 1. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 46 +++++++++++++++++++++++++------------- include/linux/drbd_genl.h | 1 + include/linux/drbd_limits.h | 5 +++++ 3 files changed, 36 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7da83f3a61e..b89e6fb468c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -819,7 +819,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke * drbd_socket_okay() - Free the socket if its connection is not okay * @sock: pointer to the pointer to the socket. */ -static int drbd_socket_okay(struct socket **sock) +static bool drbd_socket_okay(struct socket **sock) { int rr; char tb[4]; @@ -837,6 +837,30 @@ static int drbd_socket_okay(struct socket **sock) return false; } } + +static bool connection_established(struct drbd_connection *connection, + struct socket **sock1, + struct socket **sock2) +{ + struct net_conf *nc; + int timeout; + bool ok; + + if (!*sock1 || !*sock2) + return false; + + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; + rcu_read_unlock(); + schedule_timeout_interruptible(timeout); + + ok = drbd_socket_okay(sock1); + ok = drbd_socket_okay(sock2) && ok; + + return ok; +} + /* Gets called if a connection is established, or if a new minor gets created in a connection */ int drbd_connected(struct drbd_peer_device *peer_device) @@ -878,8 +902,8 @@ static int conn_connect(struct drbd_connection *connection) struct drbd_socket sock, msock; struct drbd_peer_device *peer_device; struct net_conf *nc; - int vnr, timeout, h, ok; - bool discard_my_data; + int vnr, timeout, h; + bool discard_my_data, ok; enum drbd_state_rv rv; struct accept_wait_data ad = { .connection = connection, @@ -923,17 +947,8 @@ static int conn_connect(struct drbd_connection *connection) } } - if (sock.socket && msock.socket) { - rcu_read_lock(); - nc = rcu_dereference(connection->net_conf); - timeout = nc->ping_timeo * HZ / 10; - rcu_read_unlock(); - schedule_timeout_interruptible(timeout); - ok = drbd_socket_okay(&sock.socket); - ok = drbd_socket_okay(&msock.socket) && ok; - if (ok) - break; - } + if (connection_established(connection, &sock.socket, &msock.socket)) + break; retry: s = drbd_wait_for_connect(connection, &ad); @@ -979,8 +994,7 @@ randomize: goto out_release_sockets; } - ok = drbd_socket_okay(&sock.socket); - ok = drbd_socket_okay(&msock.socket) && ok; + ok = connection_established(connection, &sock.socket, &msock.socket); } while (!ok); if (ad.s_listen) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 71fc924c53f..7b131ed8f9c 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -174,6 +174,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, /* 9: __str_field_def(31, DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */ /* 9: __u32_field(32, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_node_id) */ __flg_field_def(33, 0 /* OPTIONAL */, csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF) + __u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 9d2df1d5141..8ac8c5d9a3a 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -225,4 +225,9 @@ #define DRBD_AL_STRIPE_SIZE_MAX 16777216 #define DRBD_AL_STRIPE_SIZE_DEF 32 #define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */ + +#define DRBD_SOCKET_CHECK_TIMEO_MIN 0 +#define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX +#define DRBD_SOCKET_CHECK_TIMEO_DEF 0 +#define DRBD_SOCKET_CHECK_TIMEO_SCALE '1' #endif -- cgit v1.2.3-70-g09d2 From bf0d6e4a1138e71cafdbbb99cde430eee50c4ff1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 6 May 2014 14:28:32 +0300 Subject: drbd: silence underflow warning in read_in_block() My static checker warns that "data_size" could be negative and underflow the limit check. The code looks suspicious but I don't know if it is a real bug. Signed-off-by: Dan Carpenter Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f972988291c..9342b8da73a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1592,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, struct drbd_peer_request *peer_req; struct page *page; int dgs, ds, err; - int data_size = pi->size; + unsigned int data_size = pi->size; void *dig_in = peer_device->connection->int_dig_in; void *dig_vv = peer_device->connection->int_dig_vv; unsigned long *data; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 20ec8903b1e..debb70d4054 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -52,7 +52,7 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.4.3" +#define REL_VERSION "8.4.5" #define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- cgit v1.2.3-70-g09d2