diff options
Diffstat (limited to 'include/linux/sunrpc')
-rw-r--r-- | include/linux/sunrpc/cache.h | 4 | ||||
-rw-r--r-- | include/linux/sunrpc/clnt.h | 15 | ||||
-rw-r--r-- | include/linux/sunrpc/debug.h | 2 | ||||
-rw-r--r-- | include/linux/sunrpc/msg_prot.h | 39 | ||||
-rw-r--r-- | include/linux/sunrpc/sched.h | 59 | ||||
-rw-r--r-- | include/linux/sunrpc/svc.h | 10 | ||||
-rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 262 | ||||
-rw-r--r-- | include/linux/sunrpc/svc_xprt.h | 159 | ||||
-rw-r--r-- | include/linux/sunrpc/svcsock.h | 43 | ||||
-rw-r--r-- | include/linux/sunrpc/xdr.h | 3 | ||||
-rw-r--r-- | include/linux/sunrpc/xprt.h | 14 |
11 files changed, 507 insertions, 103 deletions
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index bd7a6b0a87a..03547d6abee 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -169,8 +169,8 @@ extern int cache_check(struct cache_detail *detail, extern void cache_flush(void); extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) -extern void cache_register(struct cache_detail *cd); -extern int cache_unregister(struct cache_detail *cd); +extern int cache_register(struct cache_detail *cd); +extern void cache_unregister(struct cache_detail *cd); extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index d9d5c5ad826..129a86e25d2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -41,11 +41,11 @@ struct rpc_clnt { struct rpc_iostats * cl_metrics; /* per-client statistics */ unsigned int cl_softrtry : 1,/* soft timeouts */ - cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ cl_autobind : 1;/* use getport() */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ + const struct rpc_timeout *cl_timeout; /* Timeout strategy */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; @@ -54,6 +54,7 @@ struct rpc_clnt { struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; + struct rpc_timeout cl_timeout_default; struct rpc_program * cl_program; char cl_inline_name[32]; }; @@ -99,7 +100,7 @@ struct rpc_create_args { struct sockaddr *address; size_t addrsize; struct sockaddr *saddress; - struct rpc_timeout *timeout; + const struct rpc_timeout *timeout; char *servername; struct rpc_program *program; u32 version; @@ -109,7 +110,6 @@ struct rpc_create_args { /* Values for "flags" field */ #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) -#define RPC_CLNT_CREATE_INTR (1UL << 1) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) #define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) #define RPC_CLNT_CREATE_NOPING (1UL << 4) @@ -123,11 +123,10 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); -int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int); +int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); -void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); - +void rpc_call_start(struct rpc_task *); int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *calldata); @@ -136,13 +135,11 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags); void rpc_restart_call(struct rpc_task *); -void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); -void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); -char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); +const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 3912cf16361..10709cbe96f 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -20,7 +20,7 @@ #define RPCDBG_BIND 0x0020 #define RPCDBG_SCHED 0x0040 #define RPCDBG_TRANS 0x0080 -#define RPCDBG_SVCSOCK 0x0100 +#define RPCDBG_SVCXPRT 0x0100 #define RPCDBG_SVCDSP 0x0200 #define RPCDBG_MISC 0x0400 #define RPCDBG_CACHE 0x0800 diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index c4beb577511..70df4f1d884 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -152,5 +152,44 @@ typedef __be32 rpc_fraghdr; */ #define RPCBIND_MAXNETIDLEN (4u) +/* + * Universal addresses are introduced in RFC 1833 and further spelled + * out in RFC 3530. RPCBIND_MAXUADDRLEN defines a maximum byte length + * of a universal address for use in allocating buffers and character + * arrays. + * + * Quoting RFC 3530, section 2.2: + * + * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the + * US-ASCII string: + * + * h1.h2.h3.h4.p1.p2 + * + * The prefix, "h1.h2.h3.h4", is the standard textual form for + * representing an IPv4 address, which is always four octets long. + * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively, + * the first through fourth octets each converted to ASCII-decimal. + * Assuming big-endian ordering, p1 and p2 are, respectively, the first + * and second octets each converted to ASCII-decimal. For example, if a + * host, in big-endian order, has an address of 0x0A010307 and there is + * a service listening on, in big endian order, port 0x020F (decimal + * 527), then the complete universal address is "10.1.3.7.2.15". + * + * ... + * + * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the + * US-ASCII string: + * + * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2 + * + * The suffix "p1.p2" is the service port, and is computed the same way + * as with universal addresses for TCP and UDP over IPv4. The prefix, + * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for + * representing an IPv6 address as defined in Section 2.2 of [RFC2373]. + * Additionally, the two alternative forms specified in Section 2.2 of + * [RFC2373] are also acceptable. + */ +#define RPCBIND_MAXUADDRLEN (56u) + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8ea077db009..f689f02e679 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -56,8 +56,6 @@ struct rpc_task { __u8 tk_garb_retry; __u8 tk_cred_retry; - unsigned long tk_cookie; /* Cookie for batching tasks */ - /* * timeout_fn to be executed by timer bottom half * callback to be executed after waking up @@ -78,7 +76,6 @@ struct rpc_task { struct timer_list tk_timer; /* kernel timer */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ - unsigned char tk_priority : 2;/* Task priority */ unsigned long tk_runstate; /* Task run status */ struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could * be any workqueue @@ -94,6 +91,9 @@ struct rpc_task { unsigned long tk_start; /* RPC task init timestamp */ long tk_rtt; /* round-trip time (jiffies) */ + pid_t tk_owner; /* Process id for batching tasks */ + unsigned char tk_priority : 2;/* Task priority */ + #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ #endif @@ -117,6 +117,15 @@ struct rpc_call_ops { void (*rpc_release)(void *); }; +struct rpc_task_setup { + struct rpc_task *task; + struct rpc_clnt *rpc_client; + const struct rpc_message *rpc_message; + const struct rpc_call_ops *callback_ops; + void *callback_data; + unsigned short flags; + signed char priority; +}; /* * RPC task flags @@ -128,7 +137,6 @@ struct rpc_call_ops { #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_KILLED 0x0100 /* task was killed */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ -#define RPC_TASK_NOINTR 0x0400 /* uninterruptible task */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) @@ -136,7 +144,6 @@ struct rpc_call_ops { #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) -#define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR) #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 @@ -180,10 +187,10 @@ struct rpc_call_ops { * Note: if you change these, you must also change * the task initialization definitions below. */ -#define RPC_PRIORITY_LOW 0 -#define RPC_PRIORITY_NORMAL 1 -#define RPC_PRIORITY_HIGH 2 -#define RPC_NR_PRIORITY (RPC_PRIORITY_HIGH+1) +#define RPC_PRIORITY_LOW (-1) +#define RPC_PRIORITY_NORMAL (0) +#define RPC_PRIORITY_HIGH (1) +#define RPC_NR_PRIORITY (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW) /* * RPC synchronization objects @@ -191,7 +198,7 @@ struct rpc_call_ops { struct rpc_wait_queue { spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ - unsigned long cookie; /* cookie of last task serviced */ + pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char count; /* # task groups remaining serviced so far */ @@ -208,41 +215,13 @@ struct rpc_wait_queue { * performance of NFS operations such as read/write. */ #define RPC_BATCH_COUNT 16 - -#ifndef RPC_DEBUG -# define RPC_WAITQ_INIT(var,qname) { \ - .lock = __SPIN_LOCK_UNLOCKED(var.lock), \ - .tasks = { \ - [0] = LIST_HEAD_INIT(var.tasks[0]), \ - [1] = LIST_HEAD_INIT(var.tasks[1]), \ - [2] = LIST_HEAD_INIT(var.tasks[2]), \ - }, \ - } -#else -# define RPC_WAITQ_INIT(var,qname) { \ - .lock = __SPIN_LOCK_UNLOCKED(var.lock), \ - .tasks = { \ - [0] = LIST_HEAD_INIT(var.tasks[0]), \ - [1] = LIST_HEAD_INIT(var.tasks[1]), \ - [2] = LIST_HEAD_INIT(var.tasks[2]), \ - }, \ - .name = qname, \ - } -#endif -# define RPC_WAITQ(var,qname) struct rpc_wait_queue var = RPC_WAITQ_INIT(var,qname) - #define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) /* * Function prototypes */ -struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, - const struct rpc_call_ops *ops, void *data); -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, - const struct rpc_call_ops *ops, void *data); -void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, - int flags, const struct rpc_call_ops *ops, - void *data); +struct rpc_task *rpc_new_task(const struct rpc_task_setup *); +struct rpc_task *rpc_run_task(const struct rpc_task_setup *); void rpc_put_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_release_calldata(const struct rpc_call_ops *, void *); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 8531a70da73..64c77105618 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -204,7 +204,7 @@ union svc_addr_u { struct svc_rqst { struct list_head rq_list; /* idle list */ struct list_head rq_all; /* all threads list */ - struct svc_sock * rq_sock; /* socket */ + struct svc_xprt * rq_xprt; /* transport ptr */ struct sockaddr_storage rq_addr; /* peer address */ size_t rq_addrlen; @@ -214,9 +214,10 @@ struct svc_rqst { struct auth_ops * rq_authop; /* authentication flavour */ u32 rq_flavor; /* pseudoflavor */ struct svc_cred rq_cred; /* auth info */ - struct sk_buff * rq_skbuff; /* fast recv inet buffer */ + void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ + size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; struct xdr_buf rq_res; struct page * rq_pages[RPCSVC_MAXPAGES]; @@ -317,11 +318,12 @@ static inline void svc_free_res_pages(struct svc_rqst *rqstp) struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ - struct svc_sock *svsk; + struct svc_xprt *xprt; struct sockaddr_storage addr; /* where reply must go */ size_t addrlen; union svc_addr_u daddr; /* where reply must come from */ struct cache_deferred_req handle; + size_t xprt_hlen; int argslen; __be32 args[0]; }; @@ -382,6 +384,8 @@ struct svc_procedure { */ struct svc_serv * svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv*)); +struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, + struct svc_pool *pool); int svc_create_thread(svc_thread_fn, struct svc_serv *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h new file mode 100644 index 00000000000..c11bbcc081f --- /dev/null +++ b/include/linux/sunrpc/svc_rdma.h @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Tom Tucker <tom@opengridcomputing.com> + */ + +#ifndef SVC_RDMA_H +#define SVC_RDMA_H +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/rpc_rdma.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> +#define SVCRDMA_DEBUG + +/* RPC/RDMA parameters and stats */ +extern unsigned int svcrdma_ord; +extern unsigned int svcrdma_max_requests; +extern unsigned int svcrdma_max_req_size; + +extern atomic_t rdma_stat_recv; +extern atomic_t rdma_stat_read; +extern atomic_t rdma_stat_write; +extern atomic_t rdma_stat_sq_starve; +extern atomic_t rdma_stat_rq_starve; +extern atomic_t rdma_stat_rq_poll; +extern atomic_t rdma_stat_rq_prod; +extern atomic_t rdma_stat_sq_poll; +extern atomic_t rdma_stat_sq_prod; + +#define RPCRDMA_VERSION 1 + +/* + * Contexts are built when an RDMA request is created and are a + * record of the resources that can be recovered when the request + * completes. + */ +struct svc_rdma_op_ctxt { + struct svc_rdma_op_ctxt *next; + struct xdr_buf arg; + struct list_head dto_q; + enum ib_wr_opcode wr_op; + enum ib_wc_status wc_status; + u32 byte_len; + struct svcxprt_rdma *xprt; + unsigned long flags; + enum dma_data_direction direction; + int count; + struct ib_sge sge[RPCSVC_MAXPAGES]; + struct page *pages[RPCSVC_MAXPAGES]; +}; + +#define RDMACTXT_F_READ_DONE 1 +#define RDMACTXT_F_LAST_CTXT 2 + +struct svcxprt_rdma { + struct svc_xprt sc_xprt; /* SVC transport structure */ + struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ + struct list_head sc_accept_q; /* Conn. waiting accept */ + int sc_ord; /* RDMA read limit */ + wait_queue_head_t sc_read_wait; + int sc_max_sge; + + int sc_sq_depth; /* Depth of SQ */ + atomic_t sc_sq_count; /* Number of SQ WR on queue */ + + int sc_max_requests; /* Depth of RQ */ + int sc_max_req_size; /* Size of each RQ WR buf */ + + struct ib_pd *sc_pd; + + struct svc_rdma_op_ctxt *sc_ctxt_head; + int sc_ctxt_cnt; + int sc_ctxt_bump; + int sc_ctxt_max; + spinlock_t sc_ctxt_lock; + struct list_head sc_rq_dto_q; + spinlock_t sc_rq_dto_lock; + struct ib_qp *sc_qp; + struct ib_cq *sc_rq_cq; + struct ib_cq *sc_sq_cq; + struct ib_mr *sc_phys_mr; /* MR for server memory */ + + spinlock_t sc_lock; /* transport lock */ + + wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ + unsigned long sc_flags; + struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ + struct list_head sc_read_complete_q; + spinlock_t sc_read_complete_lock; +}; +/* sc_flags */ +#define RDMAXPRT_RQ_PENDING 1 +#define RDMAXPRT_SQ_PENDING 2 +#define RDMAXPRT_CONN_PENDING 3 + +#define RPCRDMA_LISTEN_BACKLOG 10 +/* The default ORD value is based on two outstanding full-size writes with a + * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ +#define RPCRDMA_ORD (64/4) +#define RPCRDMA_SQ_DEPTH_MULT 8 +#define RPCRDMA_MAX_THREADS 16 +#define RPCRDMA_MAX_REQUESTS 16 +#define RPCRDMA_MAX_REQ_SIZE 4096 + +/* svc_rdma_marshal.c */ +extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, + int *, int *); +extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); +extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); +extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, + struct rpcrdma_msg *, + enum rpcrdma_errcode, u32 *); +extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); +extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); +extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, + u32, u64, u32); +extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, + struct rpcrdma_msg *, + struct rpcrdma_msg *, + enum rpcrdma_proc); +extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); + +/* svc_rdma_recvfrom.c */ +extern int svc_rdma_recvfrom(struct svc_rqst *); + +/* svc_rdma_sendto.c */ +extern int svc_rdma_sendto(struct svc_rqst *); + +/* svc_rdma_transport.c */ +extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); +extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, + enum rpcrdma_errcode); +struct page *svc_rdma_get_page(void); +extern int svc_rdma_post_recv(struct svcxprt_rdma *); +extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); +extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); +extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); +extern void svc_sq_reap(struct svcxprt_rdma *); +extern void svc_rq_reap(struct svcxprt_rdma *); +extern struct svc_xprt_class svc_rdma_class; +extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); + +/* svc_rdma.c */ +extern int svc_rdma_init(void); +extern void svc_rdma_cleanup(void); + +/* + * Returns the address of the first read chunk or <nul> if no read chunk is + * present + */ +static inline struct rpcrdma_read_chunk * +svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *ch = + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + + if (ch->rc_discrim == 0) + return NULL; + + return ch; +} + +/* + * Returns the address of the first read write array element or <nul> if no + * write array list is present + */ +static inline struct rpcrdma_write_array * +svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) +{ + if (rmsgp->rm_body.rm_chunks[0] != 0 + || rmsgp->rm_body.rm_chunks[1] == 0) + return NULL; + + return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; +} + +/* + * Returns the address of the first reply array element or <nul> if no + * reply array is present + */ +static inline struct rpcrdma_write_array * +svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *rch; + struct rpcrdma_write_array *wr_ary; + struct rpcrdma_write_array *rp_ary; + + /* XXX: Need to fix when reply list may occur with read-list and/or + * write list */ + if (rmsgp->rm_body.rm_chunks[0] != 0 || + rmsgp->rm_body.rm_chunks[1] != 0) + return NULL; + + rch = svc_rdma_get_read_chunk(rmsgp); + if (rch) { + while (rch->rc_discrim) + rch++; + + /* The reply list follows an empty write array located + * at 'rc_position' here. The reply array is at rc_target. + */ + rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; + + goto found_it; + } + + wr_ary = svc_rdma_get_write_array(rmsgp); + if (wr_ary) { + rp_ary = (struct rpcrdma_write_array *) + &wr_ary-> + wc_array[wr_ary->wc_nchunks].wc_target.rs_length; + + goto found_it; + } + + /* No read list, no write list */ + rp_ary = (struct rpcrdma_write_array *) + &rmsgp->rm_body.rm_chunks[2]; + + found_it: + if (rp_ary->wc_discrim == 0) + return NULL; + + return rp_ary; +} +#endif diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h new file mode 100644 index 00000000000..6fd7b016517 --- /dev/null +++ b/include/linux/sunrpc/svc_xprt.h @@ -0,0 +1,159 @@ +/* + * linux/include/linux/sunrpc/svc_xprt.h + * + * RPC server transport I/O + */ + +#ifndef SUNRPC_SVC_XPRT_H +#define SUNRPC_SVC_XPRT_H + +#include <linux/sunrpc/svc.h> +#include <linux/module.h> + +struct svc_xprt_ops { + struct svc_xprt *(*xpo_create)(struct svc_serv *, + struct sockaddr *, int, + int); + struct svc_xprt *(*xpo_accept)(struct svc_xprt *); + int (*xpo_has_wspace)(struct svc_xprt *); + int (*xpo_recvfrom)(struct svc_rqst *); + void (*xpo_prep_reply_hdr)(struct svc_rqst *); + int (*xpo_sendto)(struct svc_rqst *); + void (*xpo_release_rqst)(struct svc_rqst *); + void (*xpo_detach)(struct svc_xprt *); + void (*xpo_free)(struct svc_xprt *); +}; + +struct svc_xprt_class { + const char *xcl_name; + struct module *xcl_owner; + struct svc_xprt_ops *xcl_ops; + struct list_head xcl_list; + u32 xcl_max_payload; +}; + +struct svc_xprt { + struct svc_xprt_class *xpt_class; + struct svc_xprt_ops *xpt_ops; + struct kref xpt_ref; + struct list_head xpt_list; + struct list_head xpt_ready; + unsigned long xpt_flags; +#define XPT_BUSY 0 /* enqueued/receiving */ +#define XPT_CONN 1 /* conn pending */ +#define XPT_CLOSE 2 /* dead or dying */ +#define XPT_DATA 3 /* data pending */ +#define XPT_TEMP 4 /* connected transport */ +#define XPT_DEAD 6 /* transport closed */ +#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ +#define XPT_DEFERRED 8 /* deferred request pending */ +#define XPT_OLD 9 /* used for xprt aging mark+sweep */ +#define XPT_DETACHED 10 /* detached from tempsocks list */ +#define XPT_LISTENER 11 /* listening endpoint */ +#define XPT_CACHE_AUTH 12 /* cache auth info */ + + struct svc_pool *xpt_pool; /* current pool iff queued */ + struct svc_serv *xpt_server; /* service for transport */ + atomic_t xpt_reserved; /* space on outq that is rsvd */ + struct mutex xpt_mutex; /* to serialize sending data */ + spinlock_t xpt_lock; /* protects sk_deferred + * and xpt_auth_cache */ + void *xpt_auth_cache;/* auth cache */ + struct list_head xpt_deferred; /* deferred requests that need + * to be revisted */ + struct sockaddr_storage xpt_local; /* local address */ + size_t xpt_locallen; /* length of address */ + struct sockaddr_storage xpt_remote; /* remote peer's address */ + size_t xpt_remotelen; /* length of address */ +}; + +int svc_reg_xprt_class(struct svc_xprt_class *); +void svc_unreg_xprt_class(struct svc_xprt_class *); +void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, + struct svc_serv *); +int svc_create_xprt(struct svc_serv *, char *, unsigned short, int); +void svc_xprt_enqueue(struct svc_xprt *xprt); +void svc_xprt_received(struct svc_xprt *); +void svc_xprt_put(struct svc_xprt *xprt); +void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); +void svc_close_xprt(struct svc_xprt *xprt); +void svc_delete_xprt(struct svc_xprt *xprt); +int svc_port_is_privileged(struct sockaddr *sin); +int svc_print_xprts(char *buf, int maxlen); +struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int); +int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); + +static inline void svc_xprt_get(struct svc_xprt *xprt) +{ + kref_get(&xprt->xpt_ref); +} +static inline void svc_xprt_set_local(struct svc_xprt *xprt, + struct sockaddr *sa, int salen) +{ + memcpy(&xprt->xpt_local, sa, salen); + xprt->xpt_locallen = salen; +} +static inline void svc_xprt_set_remote(struct svc_xprt *xprt, + struct sockaddr *sa, int salen) +{ + memcpy(&xprt->xpt_remote, sa, salen); + xprt->xpt_remotelen = salen; +} +static inline unsigned short svc_addr_port(struct sockaddr *sa) +{ + unsigned short ret = 0; + switch (sa->sa_family) { + case AF_INET: + ret = ntohs(((struct sockaddr_in *)sa)->sin_port); + break; + case AF_INET6: + ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); + break; + } + return ret; +} + +static inline size_t svc_addr_len(struct sockaddr *sa) +{ + switch (sa->sa_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + } + return -EAFNOSUPPORT; +} + +static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) +{ + return svc_addr_port((struct sockaddr *)&xprt->xpt_local); +} + +static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) +{ + return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); +} + +static inline char *__svc_print_addr(struct sockaddr *addr, + char *buf, size_t len) +{ + switch (addr->sa_family) { + case AF_INET: + snprintf(buf, len, "%u.%u.%u.%u, port=%u", + NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), + ntohs(((struct sockaddr_in *) addr)->sin_port)); + break; + + case AF_INET6: + snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", + NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), + ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); + break; + + default: + snprintf(buf, len, "unknown address type: %d", addr->sa_family); + break; + } + return buf; +} +#endif /* SUNRPC_SVC_XPRT_H */ diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index a53e0fa855d..206f092ad4c 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -10,42 +10,16 @@ #define SUNRPC_SVCSOCK_H #include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svc_xprt.h> /* * RPC server socket. */ struct svc_sock { - struct list_head sk_ready; /* list of ready sockets */ - struct list_head sk_list; /* list of all sockets */ + struct svc_xprt sk_xprt; struct socket * sk_sock; /* berkeley socket layer */ struct sock * sk_sk; /* INET layer */ - struct svc_pool * sk_pool; /* current pool iff queued */ - struct svc_serv * sk_server; /* service for this socket */ - atomic_t sk_inuse; /* use count */ - unsigned long sk_flags; -#define SK_BUSY 0 /* enqueued/receiving */ -#define SK_CONN 1 /* conn pending */ -#define SK_CLOSE 2 /* dead or dying */ -#define SK_DATA 3 /* data pending */ -#define SK_TEMP 4 /* temp (TCP) socket */ -#define SK_DEAD 6 /* socket closed */ -#define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */ -#define SK_DEFERRED 8 /* request on sk_deferred */ -#define SK_OLD 9 /* used for temp socket aging mark+sweep */ -#define SK_DETACHED 10 /* detached from tempsocks list */ - - atomic_t sk_reserved; /* space on outq that is reserved */ - - spinlock_t sk_lock; /* protects sk_deferred and - * sk_info_authunix */ - struct list_head sk_deferred; /* deferred requests that need to - * be revisted */ - struct mutex sk_mutex; /* to serialize sending data */ - - int (*sk_recvfrom)(struct svc_rqst *rqstp); - int (*sk_sendto)(struct svc_rqst *rqstp); - /* We keep the old state_change and data_ready CB's here */ void (*sk_ostate)(struct sock *); void (*sk_odata)(struct sock *, int bytes); @@ -54,21 +28,12 @@ struct svc_sock { /* private TCP part */ int sk_reclen; /* length of record */ int sk_tcplen; /* current read length */ - time_t sk_lastrecv; /* time of last received request */ - - /* cache of various info for TCP sockets */ - void *sk_info_authunix; - - struct sockaddr_storage sk_local; /* local address */ - struct sockaddr_storage sk_remote; /* remote peer's address */ - int sk_remotelen; /* length of address */ }; /* * Function prototypes. */ -int svc_makesock(struct svc_serv *, int, unsigned short, int flags); -void svc_force_close_socket(struct svc_sock *); +void svc_close_all(struct list_head *); int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); @@ -78,6 +43,8 @@ int svc_addsock(struct svc_serv *serv, int fd, char *name_return, int *proto); +void svc_init_xprt_sock(void); +void svc_cleanup_xprt_sock(void); /* * svc_makesock socket characteristics diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 0751c9464d0..e4057d729f0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -112,7 +112,8 @@ struct xdr_buf { __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_string(__be32 *p, const char *s); -__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen); +__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, + unsigned int maxlen); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 30b17b3bc1a..b3ff9a815e6 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -120,7 +120,7 @@ struct rpc_xprt { struct kref kref; /* Reference count */ struct rpc_xprt_ops * ops; /* transport methods */ - struct rpc_timeout timeout; /* timeout parms */ + const struct rpc_timeout *timeout; /* timeout parms */ struct sockaddr_storage addr; /* server address */ size_t addrlen; /* size of server address */ int prot; /* IP protocol */ @@ -183,7 +183,7 @@ struct rpc_xprt { bklog_u; /* backlog queue utilization */ } stat; - char * address_strings[RPC_DISPLAY_MAX]; + const char *address_strings[RPC_DISPLAY_MAX]; }; struct xprt_create { @@ -191,7 +191,6 @@ struct xprt_create { struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; - struct rpc_timeout * timeout; /* optional timeout parameters */ }; struct xprt_class { @@ -203,11 +202,6 @@ struct xprt_class { }; /* - * Transport operations used by ULPs - */ -void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr); - -/* * Generic internal transport functions */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args); @@ -245,7 +239,8 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_release_rqst_cong(struct rpc_task *task); -void xprt_disconnect(struct rpc_xprt *xprt); +void xprt_disconnect_done(struct rpc_xprt *xprt); +void xprt_force_disconnect(struct rpc_xprt *xprt); /* * Reserved bit positions in xprt->state @@ -256,6 +251,7 @@ void xprt_disconnect(struct rpc_xprt *xprt); #define XPRT_CLOSE_WAIT (3) #define XPRT_BOUND (4) #define XPRT_BINDING (5) +#define XPRT_CLOSING (6) static inline void xprt_set_connected(struct rpc_xprt *xprt) { |