From 031fd3aa20fcf6d1862ea7814ee8b2caf36c0d78 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Feb 2008 11:34:10 -0500 Subject: NLM: set RPC_CLNT_CREATE_NOPING for NLM RPC clients It's currently possible for an unresponsive NLM client to completely lock up a server's lockd. The scenario is something like this: 1) client1 (or a process on the server) takes a lock on a file 2) client2 tries to take a blocking lock on the same file and awaits the callback 3) client2 goes unresponsive (plug pulled, network partition, etc) 4) client1 releases the lock ...at that point the server's lockd will try to queue up a GRANT_MSG callback for client2, but first it requeues the block with a timeout of 30s. nlm_async_call will attempt to bind the RPC client to client2 and will call rpc_ping. rpc_ping entails a sync RPC call and if client2 is unresponsive it will take around 60s for that to time out. Once it times out, it's already time to retry the block and the whole process repeats. Once in this situation, nlmsvc_retry_blocked will never return until the host starts responding again. lockd won't service new calls. Fix this by skipping the RPC ping on NLM RPC clients. This makes nlm_async_call return quickly when called. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index ca6b16fc310..00063ee0b55 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -244,6 +244,7 @@ nlm_bind_host(struct nlm_host *host) .version = host->h_version, .authflavor = RPC_AUTH_UNIX, .flags = (RPC_CLNT_CREATE_HARDRTRY | + RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_AUTOBIND), }; -- cgit v1.2.3-70-g09d2 From 90bd17c87821fe0e055e0f9a7446c2875f31eb4c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Feb 2008 11:34:11 -0500 Subject: NLM: have server-side RPC clients default to soft RPC tasks Now that it no longer does an RPC ping, lockd always ends up queueing an RPC task for the GRANT_MSG callback. But, it also requeues the block for later attempts. Since these are hard RPC tasks, if the client we're calling back goes unresponsive the GRANT_MSG callbacks can stack up in the RPC queue. Fix this by making server-side RPC clients default to soft RPC tasks. lockd requeues the block anyway, so this should be OK. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 00063ee0b55..f1ef49fff11 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -243,11 +243,18 @@ nlm_bind_host(struct nlm_host *host) .program = &nlm_program, .version = host->h_version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_HARDRTRY | - RPC_CLNT_CREATE_NOPING | + .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_AUTOBIND), }; + /* + * lockd retries server side blocks automatically so we want + * those to be soft RPC calls. Client side calls need to be + * hard RPC tasks. + */ + if (!host->h_server) + args.flags |= RPC_CLNT_CREATE_HARDRTRY; + clnt = rpc_create(&args); if (!IS_ERR(clnt)) host->h_rpcclnt = clnt; -- cgit v1.2.3-70-g09d2 From 9706501e43a80ce48b319214a0a9e562deded35b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Feb 2008 11:34:12 -0500 Subject: NLM: don't reattempt GRANT_MSG when there is already an RPC in flight With the current scheme in nlmsvc_grant_blocked, we can end up with more than one GRANT_MSG callback for a block in flight. Right now, we requeue the block unconditionally so that a GRANT_MSG callback is done again in 30s. If the client is unresponsive, it can take more than 30s for the call already in flight to time out. There's no benefit to having more than one GRANT_MSG RPC queued up at a time, so put it on the list with a timeout of NLM_NEVER before doing the RPC call. If the RPC call submission fails, we requeue it with a short timeout. If it works, then nlmsvc_grant_callback will end up requeueing it with a shorter timeout after it completes. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/svclock.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 2f4d8fa6668..82db7b323b8 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -763,11 +763,20 @@ callback: dprintk("lockd: GRANTing blocked lock.\n"); block->b_granted = 1; - /* Schedule next grant callback in 30 seconds */ - nlmsvc_insert_block(block, 30 * HZ); + /* keep block on the list, but don't reattempt until the RPC + * completes or the submission fails + */ + nlmsvc_insert_block(block, NLM_NEVER); - /* Call the client */ - nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops); + /* Call the client -- use a soft RPC task since nlmsvc_retry_blocked + * will queue up a new one if this one times out + */ + error = nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, + &nlmsvc_grant_ops); + + /* RPC submission failed, wait a bit and retry */ + if (error < 0) + nlmsvc_insert_block(block, 10 * HZ); } /* -- cgit v1.2.3-70-g09d2 From c64e80d55db81df22a7f25b75ab4ba4c55db4749 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Feb 2008 11:34:13 -0500 Subject: NLM: don't requeue block if it was invalidated while GRANT_MSG was in flight It's possible for lockd to catch a SIGKILL while a GRANT_MSG callback is in flight. If this happens we don't want lockd to insert the block back into the nlm_blocked list. This helps that situation, but there's still a possible race. Fixing that will mean adding real locking for nlm_blocked. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/svclock.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 82db7b323b8..fe9bdb4a220 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -795,6 +795,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) dprintk("lockd: GRANT_MSG RPC callback\n"); + /* if the block is not on a list at this point then it has + * been invalidated. Don't try to requeue it. + * + * FIXME: it's possible that the block is removed from the list + * after this check but before the nlmsvc_insert_block. In that + * case it will be added back. Perhaps we need better locking + * for nlm_blocked? + */ + if (list_empty(&block->b_list)) + return; + /* Technically, we should down the file semaphore here. Since we * move the block towards the head of the queue only, no harm * can be done, though. */ -- cgit v1.2.3-70-g09d2 From fbb7878c1a2ee40a1e983bf20f3dd3a80255dcf2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 7 Feb 2008 23:10:21 -0500 Subject: nfsd: clean up svc_reserve_auth() This is a void function attempting to return the return value from another void function, which seems harmless but extremely weird, and apparently makes some compilers complain. While we're there, clean up a little (e.g. the switch statement had a minor style problem and seemed overkill as long as there's only one case). Thanks to Trond for noticing this. Signed-off-by: J. Bruce Fields Cc: Trond Myklebust --- include/linux/sunrpc/svc.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 64c77105618..64c97552964 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -409,16 +409,13 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); * for all cases without actually generating the checksum, so we just use a * static value. */ -static inline void -svc_reserve_auth(struct svc_rqst *rqstp, int space) +static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) { - int added_space = 0; + int added_space = 0; - switch(rqstp->rq_authop->flavour) { - case RPC_AUTH_GSS: - added_space = RPC_MAX_AUTH_SIZE; - } - return svc_reserve(rqstp, space + added_space); + if (rqstp->rq_authop->flavour) + added_space = RPC_MAX_AUTH_SIZE; + svc_reserve(rqstp, space + added_space); } #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3-70-g09d2 From bb50c8012cbd85b8e105584b32e4d5a2d335dcef Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 8 Feb 2008 16:02:04 -0800 Subject: SUNPRC: Fix printk format warning net/sunrpc/xprtrdma/svc_rdma_sendto.c:160: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'u64' Signed-off-by: Roland Dreier Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 3e321949e1d..0598b229c11 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -159,7 +159,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, BUG_ON(sge_count >= 32); dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, xdr_sge=%p, sge_count=%d\n", - rmr, to, xdr_off, write_len, xdr_sge, sge_count); + rmr, (unsigned long long)to, xdr_off, + write_len, xdr_sge, sge_count); ctxt = svc_rdma_get_context(xprt); ctxt->count = 0; -- cgit v1.2.3-70-g09d2