From aa91647c898d62e869fcf35e977ab3c533be8fc1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 May 2010 11:15:51 -0700 Subject: ceph: make mds requests killable, not interruptible The underlying problem is that many mds requests can't be restarted. For example, a restarted create() would return -EEXIST if the original request succeeds. However, we do not want a hung MDS to hang the client too. So, use the _killable wait_for_completion variants to abort on SIGKILL but nothing else. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph/mds_client.c') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 885aa5710cf..08413c8a85b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1768,12 +1768,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); if (req->r_timeout) { - err = (long)wait_for_completion_interruptible_timeout( + err = (long)wait_for_completion_killable_timeout( &req->r_completion, req->r_timeout); if (err == 0) err = -EIO; } else { - err = wait_for_completion_interruptible(&req->r_completion); + err = wait_for_completion_killable(&req->r_completion); } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); -- cgit v1.2.3-70-g09d2 From dd1c9057366f329911180e9000e2b425f23fc287 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 16:45:25 -0700 Subject: ceph: make lease code DN specific The lease code includes a mask in the CEPH_LOCK_* namespace, but that namespace is changing, and only one mask (formerly _DN == 1) is used, so hard code for that value for now. If we ever extend this code to handle leases over different data types we can extend it accordingly. Signed-off-by: Sage Weil --- fs/ceph/ceph_fs.h | 21 +++++++++++---------- fs/ceph/mds_client.c | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs/ceph/mds_client.c') diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 3b9eeed097b..2fa992eaf7d 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -265,16 +265,17 @@ extern const char *ceph_mds_state_name(int s); * - they also define the lock ordering by the MDS * - a few of these are internal to the mds */ -#define CEPH_LOCK_DN 1 -#define CEPH_LOCK_ISNAP 2 -#define CEPH_LOCK_IVERSION 4 /* mds internal */ -#define CEPH_LOCK_IFILE 8 /* mds internal */ -#define CEPH_LOCK_IAUTH 32 -#define CEPH_LOCK_ILINK 64 -#define CEPH_LOCK_IDFT 128 /* dir frag tree */ -#define CEPH_LOCK_INEST 256 /* mds internal */ -#define CEPH_LOCK_IXATTR 512 -#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ /* client_session ops */ enum { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 08413c8a85b..5a88b7bb379 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2541,7 +2541,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, return; lease = msg->front.iov_base; lease->action = action; - lease->mask = cpu_to_le16(CEPH_LOCK_DN); + lease->mask = cpu_to_le16(1); lease->ino = cpu_to_le64(ceph_vino(inode).ino); lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->seq = cpu_to_le32(seq); @@ -2571,7 +2571,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, BUG_ON(inode == NULL); BUG_ON(dentry == NULL); - BUG_ON(mask != CEPH_LOCK_DN); + BUG_ON(mask == 0); /* is dentry lease valid? */ spin_lock(&dentry->d_lock); -- cgit v1.2.3-70-g09d2 From 2a8e5e3637e2fc058798f5d3626f525729ffaaaf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 28 May 2010 16:43:16 -0700 Subject: ceph: clean up on forwarded aborted mds request If an mds request is aborted (timeout, SIGKILL), it is left registered to keep our state in sync with the mds. If we get a forward notification, though, we know the request didn't succeed and we can unregister it safely. We were trying to resend it, but then bailing out (and not unregistering) in __do_request. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/ceph/mds_client.c') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5a88b7bb379..b49f12822cb 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2014,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); req = __lookup_request(mdsc, tid); if (!req) { - dout("forward %llu to mds%d - req dne\n", tid, next_mds); + dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); goto out; /* dup reply? */ } - if (fwd_seq <= req->r_num_fwd) { - dout("forward %llu to mds%d - old seq %d <= %d\n", + if (req->r_aborted) { + dout("forward tid %llu aborted, unregistering\n", tid); + __unregister_request(mdsc, req); + } else if (fwd_seq <= req->r_num_fwd) { + dout("forward tid %llu to mds%d - old seq %d <= %d\n", tid, next_mds, req->r_num_fwd, fwd_seq); } else { /* resend. forward race not possible; mds would drop */ - dout("forward %llu to mds%d (we resend)\n", tid, next_mds); + dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); + BUG_ON(req->r_err); + BUG_ON(req->r_got_result); req->r_num_fwd = fwd_seq; req->r_resend_mds = next_mds; put_request_session(req); -- cgit v1.2.3-70-g09d2