From 2ea24497a1b30dd03dd42b873fa5097913587f4d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 10 Feb 2014 11:18:39 -0500 Subject: SUNRPC: RPC callbacks may be split across several TCP segments Since TCP is a stream protocol, our callback read code needs to take into account the fact that RPC callbacks are not always confined to a single TCP segment. This patch adds support for multiple TCP segments by ensuring that we only remove the rpc_rqst structure from the 'free backchannel requests' list once the data has been completely received. We rely on the fact that TCP data is ordered for the duration of the connection. Reported-by: shaobingqing Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 93 +++++++++++++++++++++++++++++-------------- net/sunrpc/xprtsock.c | 28 ++++--------- 2 files changed, 72 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index e860d4f7ed2..3513d559bc4 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -212,39 +212,23 @@ out: } EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); -/* - * One or more rpc_rqst structure have been preallocated during the - * backchannel setup. Buffer space for the send and private XDR buffers - * has been preallocated as well. Use xprt_alloc_bc_request to allocate - * to this request. Use xprt_free_bc_request to return it. - * - * We know that we're called in soft interrupt context, grab the spin_lock - * since there is no need to grab the bottom half spin_lock. - * - * Return an available rpc_rqst, otherwise NULL if non are available. - */ -struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) +static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) { - struct rpc_rqst *req; + struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - spin_lock(&xprt->bc_pa_lock); - if (!list_empty(&xprt->bc_pa_list)) { - req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, - rq_bc_pa_list); - list_del(&req->rq_bc_pa_list); - } else { - req = NULL; - } - spin_unlock(&xprt->bc_pa_lock); + if (list_empty(&xprt->bc_pa_list)) + goto not_found; - if (req != NULL) { - set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); - req->rq_reply_bytes_recvd = 0; - req->rq_bytes_sent = 0; - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + req->rq_reply_bytes_recvd = 0; + req->rq_bytes_sent = 0; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); - } + req->rq_xid = xid; + req->rq_connect_cookie = xprt->connect_cookie; +not_found: dprintk("RPC: backchannel req=%p\n", req); return req; } @@ -259,6 +243,7 @@ void xprt_free_bc_request(struct rpc_rqst *req) dprintk("RPC: free backchannel req=%p\n", req); + req->rq_connect_cookie = xprt->connect_cookie - 1; smp_mb__before_clear_bit(); WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); @@ -281,7 +266,57 @@ void xprt_free_bc_request(struct rpc_rqst *req) * may be reused by a new callback request. */ spin_lock_bh(&xprt->bc_pa_lock); - list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); spin_unlock_bh(&xprt->bc_pa_lock); } +/* + * One or more rpc_rqst structure have been preallocated during the + * backchannel setup. Buffer space for the send and private XDR buffers + * has been preallocated as well. Use xprt_alloc_bc_request to allocate + * to this request. Use xprt_free_bc_request to return it. + * + * We know that we're called in soft interrupt context, grab the spin_lock + * since there is no need to grab the bottom half spin_lock. + * + * Return an available rpc_rqst, otherwise NULL if non are available. + */ +struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid) +{ + struct rpc_rqst *req; + + spin_lock(&xprt->bc_pa_lock); + list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { + if (req->rq_connect_cookie != xprt->connect_cookie) + continue; + if (req->rq_xid == xid) + goto found; + } + req = xprt_alloc_bc_request(xprt, xid); +found: + spin_unlock(&xprt->bc_pa_lock); + return req; +} + +/* + * Add callback request to callback list. The callback + * service sleeps on the sv_cb_waitq waiting for new + * requests. Wake it up after adding enqueing the + * request. + */ +void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) +{ + struct rpc_xprt *xprt = req->rq_xprt; + struct svc_serv *bc_serv = xprt->bc_serv; + + req->rq_private_buf.len = copied; + set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + + dprintk("RPC: add callback request to list\n"); + spin_lock(&bc_serv->sv_cb_lock); + list_del(&req->rq_bc_pa_list); + list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); + wake_up(&bc_serv->sv_cb_waitq); + spin_unlock(&bc_serv->sv_cb_lock); +} + diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0addefca8e7..966763d735e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1306,41 +1306,29 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, * If we're unable to obtain the rpc_rqst we schedule the closing of the * connection and return -1. */ -static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, +static int xs_tcp_read_callback(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct rpc_rqst *req; - req = xprt_alloc_bc_request(xprt); + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_bc_request(xprt, transport->tcp_xid); if (req == NULL) { + spin_unlock(&xprt->transport_lock); printk(KERN_WARNING "Callback slot table overflowed\n"); xprt_force_disconnect(xprt); return -1; } - req->rq_xid = transport->tcp_xid; dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); xs_tcp_read_common(xprt, desc, req); - if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { - struct svc_serv *bc_serv = xprt->bc_serv; - - /* - * Add callback request to callback list. The callback - * service sleeps on the sv_cb_waitq waiting for new - * requests. Wake it up after adding enqueing the - * request. - */ - dprintk("RPC: add callback request to list\n"); - spin_lock(&bc_serv->sv_cb_lock); - list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); - spin_unlock(&bc_serv->sv_cb_lock); - wake_up(&bc_serv->sv_cb_waitq); - } - - req->rq_private_buf.len = transport->tcp_copied; + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) + xprt_complete_bc_request(req, transport->tcp_copied); + spin_unlock(&xprt->transport_lock); return 0; } -- cgit v1.2.3-70-g09d2 From 485f2251782f7c44299c491d4676a8a01428d191 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Mar 2014 12:51:44 -0400 Subject: SUNRPC: Ensure that call_connect times out correctly When the server is unavailable due to a networking error, etc, we want the RPC client to respect the timeout delays when attempting to reconnect. Reported-by: Neil Brown Fixes: 561ec1603171 (SUNRPC: call_connect_status should recheck bind..) Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0edada97343..5a1b8fa9ca1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1798,10 +1798,6 @@ call_connect_status(struct rpc_task *task) trace_rpc_connect_status(task, status); task->tk_status = 0; switch (status) { - /* if soft mounted, test if we've timed out */ - case -ETIMEDOUT: - task->tk_action = call_timeout; - return; case -ECONNREFUSED: case -ECONNRESET: case -ECONNABORTED: @@ -1812,7 +1808,9 @@ call_connect_status(struct rpc_task *task) if (RPC_IS_SOFTCONN(task)) break; case -EAGAIN: - task->tk_action = call_bind; + /* Check for timeouts before looping back to call_bind */ + case -ETIMEDOUT: + task->tk_action = call_timeout; return; case 0: clnt->cl_stats->netreconn++; -- cgit v1.2.3-70-g09d2 From fdb63dcdb53a3c6dc11d4e438ef2425ec962d1e9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Mar 2014 12:57:31 -0400 Subject: SUNRPC: Ensure that call_bind times out correctly If the rpcbind server is unavailable, we still want the RPC client to respect the timeout. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a1b8fa9ca1..cea1308a6fd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1728,9 +1728,7 @@ call_bind_status(struct rpc_task *task) case -EPROTONOSUPPORT: dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", task->tk_pid); - task->tk_status = 0; - task->tk_action = call_bind; - return; + goto retry_timeout; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: case -ECONNABORTED: @@ -1756,6 +1754,7 @@ call_bind_status(struct rpc_task *task) return; retry_timeout: + task->tk_status = 0; task->tk_action = call_timeout; } -- cgit v1.2.3-70-g09d2 From 2b7bbc963da8d076f263574af4138b5df2e1581f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 12 Mar 2014 12:51:30 -0400 Subject: SUNRPC: Fix large reads on NFS/RDMA After commit a11a2bf4, "SUNRPC: Optimise away unnecessary data moves in xdr_align_pages", Thu Aug 2 13:21:43 2012, READs larger than a few hundred bytes via NFS/RDMA no longer work. This commit exposed a long-standing bug in rpcrdma_inline_fixup(). I reproduce this with an rsize=4096 mount using the cthon04 basic tests. Test 5 fails with an EIO error. For my reproducer, kernel log shows: NFS: server cheating in read reply: count 4096 > recvd 0 rpcrdma_inline_fixup() is zeroing the xdr_stream::page_len field, and xdr_align_pages() is now returning that value to the READ XDR decoder function. That field is set up by xdr_inline_pages() by the READ XDR encoder function. As far as I can tell, it is supposed to be left alone after that, as it describes the dimensions of the reply xdr_stream, not the contents of that stream. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=68391 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/rpc_rdma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e03725bfe2b..96ead526b12 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -649,9 +649,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) break; page_base = 0; } - rqst->rq_rcv_buf.page_len = olen - copy_len; - } else - rqst->rq_rcv_buf.page_len = 0; + } if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { curlen = copy_len; -- cgit v1.2.3-70-g09d2 From 3a0799a94c0384a3b275a73267aaa10517b1bf7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 12 Mar 2014 12:51:39 -0400 Subject: SUNRPC: remove KERN_INFO from dprintk() call sites The use of KERN_INFO causes garbage characters to appear when debugging is enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 285dc088411..1eb9c468d0c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -733,7 +733,7 @@ static void __exit xprt_rdma_cleanup(void) { int rc; - dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); + dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); #ifdef RPC_DEBUG if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); @@ -755,14 +755,14 @@ static int __init xprt_rdma_init(void) if (rc) return rc; - dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); + dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); - dprintk(KERN_INFO "Defaults:\n"); - dprintk(KERN_INFO "\tSlots %d\n" + dprintk("Defaults:\n"); + dprintk("\tSlots %d\n" "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", xprt_rdma_slot_table_entries, xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); - dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", + dprintk("\tPadding %d\n\tMemreg %d\n", xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); #ifdef RPC_DEBUG -- cgit v1.2.3-70-g09d2 From 9455e3f43b017f560daf4289d0fa295a33976f2a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Mar 2014 13:25:43 -0400 Subject: SUNRPC: Ensure call_status() deals correctly with SOFTCONN tasks Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cea1308a6fd..ef96568902c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2004,6 +2004,10 @@ call_status(struct rpc_task *task) case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) { + rpc_exit(task, status); + break; + } /* * Delay any retries for 3 seconds, then handle as if it * were a timeout. -- cgit v1.2.3-70-g09d2 From 1fa3e2eb9db07f30a605c66d1a2fdde4b24e74d5 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Thu, 20 Mar 2014 11:23:03 -0400 Subject: SUNRPC: Ensure call_connect_status() deals correctly with SOFTCONN tasks Don't schedule an rpc_delay before checking to see if the task is a SOFTCONN because the tk_callback from the delay (__rpc_atrun) clears the task status before the rpc_exit_task can be run. Signed-off-by: Steve Dickson Fixes: 561ec1603171c (SUNRPC: call_connect_status should recheck...) Link: http://lkml.kernel.org/r/5329CF7C.7090308@RedHat.com Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ef96568902c..62f86793b70 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1802,10 +1802,10 @@ call_connect_status(struct rpc_task *task) case -ECONNABORTED: case -ENETUNREACH: case -EHOSTUNREACH: - /* retry with existing socket, after a delay */ - rpc_delay(task, 3*HZ); if (RPC_IS_SOFTCONN(task)) break; + /* retry with existing socket, after a delay */ + rpc_delay(task, 3*HZ); case -EAGAIN: /* Check for timeouts before looping back to call_bind */ case -ETIMEDOUT: -- cgit v1.2.3-70-g09d2 From 6bd144160a5554e4af052c153a094c4851a4c6aa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2014 12:53:54 -0400 Subject: SUNRPC: Don't let rpc_delay() clobber non-timeout errors Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ff3cc4bf4b2..25578afe154 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -637,7 +637,8 @@ static void __rpc_queue_timer_fn(unsigned long ptr) static void __rpc_atrun(struct rpc_task *task) { - task->tk_status = 0; + if (task->tk_status == -ETIMEDOUT) + task->tk_status = 0; } /* -- cgit v1.2.3-70-g09d2 From 494314c415e2d3b308f57c9245ae6525166c70b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Mar 2014 12:59:09 -0400 Subject: SUNRPC: rpc_restart_call/rpc_restart_call_prepare should clear task->tk_status When restarting an rpc call, we should not be carrying over data from the previous call. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62f86793b70..f400445d1a4 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1363,6 +1363,7 @@ rpc_restart_call_prepare(struct rpc_task *task) if (RPC_ASSASSINATED(task)) return 0; task->tk_action = call_start; + task->tk_status = 0; if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; return 1; @@ -1379,6 +1380,7 @@ rpc_restart_call(struct rpc_task *task) if (RPC_ASSASSINATED(task)) return 0; task->tk_action = call_start; + task->tk_status = 0; return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call); -- cgit v1.2.3-70-g09d2