From 02107148349f31eee7c0fb06fd7a880df73dbd20 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:49 +0100 Subject: SUNRPC: switchable buffer allocation Add RPC client transport switch support for replacing buffer management on a per-transport basis. In the current IPv4 socket transport implementation, RPC buffers are allocated as needed for each RPC message that is sent. Some transport implementations may choose to use pre-allocated buffers for encoding, sending, receiving, and unmarshalling RPC messages, however. For transports capable of direct data placement, the buffers can be carved out of a pre-registered area of memory rather than from a slab cache. Test-plan: Millions of fsx operations. Performance characterization with "sio" and "iozone". Use oprofile and other tools to look for significant regression in CPU utilization. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 14 ++++++++------ net/sunrpc/sched.c | 50 +++++++++++++++++++++++++++----------------------- net/sunrpc/xprt.c | 3 +++ net/sunrpc/xprtsock.c | 5 +++++ 4 files changed, 43 insertions(+), 29 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b23c0d328c9..25cba94c568 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -644,24 +644,26 @@ call_reserveresult(struct rpc_task *task) /* * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. - * (Note: buffer memory is freed in rpc_task_release). + * (Note: buffer memory is freed in xprt_release). */ static void call_allocate(struct rpc_task *task) { + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; unsigned int bufsiz; dprintk("RPC: %4d call_allocate (status %d)\n", task->tk_pid, task->tk_status); task->tk_action = call_bind; - if (task->tk_buffer) + if (req->rq_buffer) return; /* FIXME: compute buffer requirements more exactly using * auth->au_wslack */ bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; - if (rpc_malloc(task, bufsiz << 1) != NULL) + if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) return; printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); @@ -704,14 +706,14 @@ call_encode(struct rpc_task *task) task->tk_pid, task->tk_status); /* Default buffer setup */ - bufsiz = task->tk_bufsize >> 1; - sndbuf->head[0].iov_base = (void *)task->tk_buffer; + bufsiz = req->rq_bufsize >> 1; + sndbuf->head[0].iov_base = (void *)req->rq_buffer; sndbuf->head[0].iov_len = bufsiz; sndbuf->tail[0].iov_len = 0; sndbuf->page_len = 0; sndbuf->len = 0; sndbuf->buflen = bufsiz; - rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); + rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); rcvbuf->head[0].iov_len = bufsiz; rcvbuf->tail[0].iov_len = 0; rcvbuf->page_len = 0; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 48510e3ffa0..7415406aa1a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -41,8 +41,6 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); -static void rpc_free(struct rpc_task *task); - static void rpc_async_schedule(void *); /* @@ -599,7 +597,6 @@ void rpc_exit_task(struct rpc_task *task) WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ xprt_release(task); - rpc_free(task); } } } @@ -724,17 +721,19 @@ static void rpc_async_schedule(void *arg) __rpc_execute((struct rpc_task *)arg); } -/* - * Allocate memory for RPC purposes. +/** + * rpc_malloc - allocate an RPC buffer + * @task: RPC task that will use this buffer + * @size: requested byte size * * We try to ensure that some NFS reads and writes can always proceed * by using a mempool when allocating 'small' buffers. * In order to avoid memory starvation triggering more writebacks of * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. */ -void * -rpc_malloc(struct rpc_task *task, size_t size) +void * rpc_malloc(struct rpc_task *task, size_t size) { + struct rpc_rqst *req = task->tk_rqstp; gfp_t gfp; if (task->tk_flags & RPC_TASK_SWAPPER) @@ -743,27 +742,33 @@ rpc_malloc(struct rpc_task *task, size_t size) gfp = GFP_NOFS; if (size > RPC_BUFFER_MAXSIZE) { - task->tk_buffer = kmalloc(size, gfp); - if (task->tk_buffer) - task->tk_bufsize = size; + req->rq_buffer = kmalloc(size, gfp); + if (req->rq_buffer) + req->rq_bufsize = size; } else { - task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); - if (task->tk_buffer) - task->tk_bufsize = RPC_BUFFER_MAXSIZE; + req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); + if (req->rq_buffer) + req->rq_bufsize = RPC_BUFFER_MAXSIZE; } - return task->tk_buffer; + return req->rq_buffer; } -static void -rpc_free(struct rpc_task *task) +/** + * rpc_free - free buffer allocated via rpc_malloc + * @task: RPC task with a buffer to be freed + * + */ +void rpc_free(struct rpc_task *task) { - if (task->tk_buffer) { - if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) - mempool_free(task->tk_buffer, rpc_buffer_mempool); + struct rpc_rqst *req = task->tk_rqstp; + + if (req->rq_buffer) { + if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) + mempool_free(req->rq_buffer, rpc_buffer_mempool); else - kfree(task->tk_buffer); - task->tk_buffer = NULL; - task->tk_bufsize = 0; + kfree(req->rq_buffer); + req->rq_buffer = NULL; + req->rq_bufsize = 0; } } @@ -887,7 +892,6 @@ void rpc_release_task(struct rpc_task *task) xprt_release(task); if (task->tk_msg.rpc_cred) rpcauth_unbindcred(task); - rpc_free(task); if (task->tk_client) { rpc_release_client(task->tk_client); task->tk_client = NULL; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6dda3860351..069a6cbd49e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -838,6 +838,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_timeout = xprt->timeout.to_initval; req->rq_task = task; req->rq_xprt = xprt; + req->rq_buffer = NULL; + req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, @@ -867,6 +869,7 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); + xprt->ops->buf_free(task); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 77e8800d412..51f07c9a751 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1161,6 +1162,8 @@ static struct rpc_xprt_ops xs_udp_ops = { .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, .send_request = xs_udp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_rtt, .timer = xs_udp_timer, @@ -1173,6 +1176,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, -- cgit v1.2.3-70-g09d2