From 2397aa8b515f7bd77c8d5698170b6a98fdd6721c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Jan 2015 11:02:54 -0500 Subject: svcrdma: Clean up read chunk counting The byte_count argument is not used, and the function is called only from one place. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 975da754c77..2280325e4c8 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -178,8 +178,6 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQ_SIZE 4096 /* svc_rdma_marshal.c */ -extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, - int *, int *); extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, -- cgit v1.2.3-70-g09d2 From e54524111f51eac1900cf91aca3d38a92a6b11c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Jan 2015 11:03:20 -0500 Subject: svcrdma: Plant reader function in struct svcxprt_rdma The RDMA reader function doesn't change once an svcxprt_rdma is instantiated. Instead of checking sc_devcap during every incoming RPC, set the reader function once when the connection is accepted. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 10 +++++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 71 ++++++++++++-------------------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 + 3 files changed, 39 insertions(+), 44 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 2280325e4c8..f161e309f25 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -150,6 +150,10 @@ struct svcxprt_rdma { struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; struct ib_mr *sc_phys_mr; /* MR for server memory */ + int (*sc_reader)(struct svcxprt_rdma *, + struct svc_rqst *, + struct svc_rdma_op_ctxt *, + int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ u32 sc_dma_lkey; /* local dma key */ unsigned int sc_frmr_pg_list_len; @@ -195,6 +199,12 @@ extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); +extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); +extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); /* svc_rdma_sendto.c */ extern int svc_rdma_sendto(struct svc_rqst *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 577f8659ca3..c3aebc1bf0a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -117,26 +117,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) return min_t(int, sge_count, xprt->sc_max_sge); } -typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last); - /* Issue an RDMA_READ using the local lkey to map the data sink */ -static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; @@ -221,15 +211,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, } /* Issue an RDMA_READ using an FRMR to map the data sink */ -static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; @@ -374,9 +364,9 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, { int page_no, ret; struct rpcrdma_read_chunk *ch; - u32 page_offset, byte_count; + u32 handle, page_offset, byte_count; u64 rs_offset; - rdma_reader_fn reader; + bool last; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); @@ -399,27 +389,20 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, head->arg.len = rqstp->rq_arg.len; head->arg.buflen = rqstp->rq_arg.buflen; - /* Use FRMR if supported */ - if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) - reader = rdma_read_chunk_frmr; - else - reader = rdma_read_chunk_lcl; - page_no = 0; page_offset = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++) { - + handle = be32_to_cpu(ch->rc_target.rs_handle); + byte_count = be32_to_cpu(ch->rc_target.rs_length); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); - byte_count = ntohl(ch->rc_target.rs_length); while (byte_count > 0) { - ret = reader(xprt, rqstp, head, - &page_no, &page_offset, - ntohl(ch->rc_target.rs_handle), - byte_count, rs_offset, - ((ch+1)->rc_discrim == 0) /* last */ - ); + last = (ch + 1)->rc_discrim == xdr_zero; + ret = xprt->sc_reader(xprt, rqstp, head, + &page_no, &page_offset, + handle, byte_count, + rs_offset, last); if (ret < 0) goto err; byte_count -= ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f2e059bbab4..f609c1c2d38 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -974,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) * NB: iWARP requires remote write access for the data sink * of an RDMA_READ. IB does not. */ + newxprt->sc_reader = rdma_read_chunk_lcl; if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { newxprt->sc_frmr_pg_list_len = devattr.max_fast_reg_page_list_len; newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; + newxprt->sc_reader = rdma_read_chunk_frmr; } /* -- cgit v1.2.3-70-g09d2 From 0b056c224bea63060ce8a981e84193c93fac6f5d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Jan 2015 11:03:37 -0500 Subject: svcrdma: Support RDMA_NOMSG requests Currently the Linux server can not decode RDMA_NOMSG type requests. Operations whose length exceeds the fixed size of RDMA SEND buffers, like large NFSv4 CREATE(NF4LNK) operations, must be conveyed via RDMA_NOMSG. For an RDMA_MSG type request, the client sends the RPC/RDMA, RPC headers, and some or all of the NFS arguments via RDMA SEND. For an RDMA_NOMSG type request, the client sends just the RPC/RDMA header via RDMA SEND. The request's read list contains elements for the entire RPC message, including the RPC header. NFSD expects the RPC/RMDA header and RPC header to be contiguous in page zero of the XDR buffer. Add logic in the RDMA READ path to make the read list contents land where the server prefers, when the incoming message is a type RDMA_NOMSG message. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 39 ++++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f161e309f25..c343a94bc79 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -79,6 +79,7 @@ struct svc_rdma_op_ctxt { enum ib_wr_opcode wr_op; enum ib_wc_status wc_status; u32 byte_len; + u32 position; struct svcxprt_rdma *xprt; unsigned long flags; enum dma_data_direction direction; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index a67dd1a081d..36cf51a3eab 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -60,6 +60,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *ctxt, u32 byte_count) { + struct rpcrdma_msg *rmsgp; struct page *page; u32 bc; int sge_no; @@ -82,7 +83,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* If data remains, store it in the pagelist */ rqstp->rq_arg.page_len = bc; rqstp->rq_arg.page_base = 0; - rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; + if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) + rqstp->rq_arg.pages = &rqstp->rq_pages[0]; + else + rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + sge_no = 1; while (bc && sge_no < ctxt->count) { page = ctxt->pages[sge_no]; @@ -383,7 +391,6 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, */ head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; head->hdr_count = head->count; head->arg.page_base = 0; head->arg.page_len = 0; @@ -393,9 +400,17 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; position = be32_to_cpu(ch->rc_position); + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + if (position == 0) { + head->arg.pages = &head->pages[0]; + page_offset = head->byte_len; + } else { + head->arg.pages = &head->pages[head->count]; + page_offset = 0; + } + ret = 0; page_no = 0; - page_offset = 0; for (; ch->rc_discrim != xdr_zero; ch++) { if (be32_to_cpu(ch->rc_position) != position) goto err; @@ -418,7 +433,10 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, head->arg.buflen += ret; } } + ret = 1; + head->position = position; + err: /* Detach arg pages. svc_recv will replenish them */ for (page_no = 0; @@ -465,6 +483,21 @@ static int rdma_read_complete(struct svc_rqst *rqstp, put_page(rqstp->rq_pages[page_no]); rqstp->rq_pages[page_no] = head->pages[page_no]; } + + /* Adjustments made for RDMA_NOMSG type requests */ + if (head->position == 0) { + if (head->arg.len <= head->sge[0].length) { + head->arg.head[0].iov_len = head->arg.len - + head->byte_len; + head->arg.page_len = 0; + } else { + head->arg.head[0].iov_len = head->sge[0].length - + head->byte_len; + head->arg.page_len = head->arg.len - + head->sge[0].length; + } + } + /* Point rq_arg.pages past header */ rdma_fix_xdr_pad(&head->arg); rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; -- cgit v1.2.3-70-g09d2 From 3c5199143bc4b35f472c5c2534026d74821e2044 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 Jan 2015 08:19:32 -0500 Subject: sunrpc/lockd: fix references to the BKL The BKL is completely out of the picture in the lockd and sunrpc code these days. Update the antiquated comments that refer to it. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/svclock.c | 4 ++-- include/linux/sunrpc/svc.h | 2 +- net/sunrpc/svc.c | 4 ++-- net/sunrpc/svc_xprt.c | 3 +-- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 56598742dde..5581e020644 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock); static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) { /* - * We can get away with a static buffer because we're only - * called with BKL held. + * We can get away with a static buffer because this is only called + * from lockd, which is single-threaded. */ static char buf[2*NLM_MAXCOOKIELEN+1]; unsigned int i, len = sizeof(buf); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6f22cfeef5e..fae6fb947fc 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -110,7 +110,7 @@ struct svc_serv { * We use sv_nrthreads as a reference count. svc_destroy() drops * this refcount, so we need to bump it up around operations that * change the number of threads. Horrible, but there it is. - * Should be called with the BKL held. + * Should be called with the "service mutex" held. */ static inline void svc_get(struct svc_serv *serv) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 91eaef1844c..78974e4d9ad 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) EXPORT_SYMBOL_GPL(svc_set_num_threads); /* - * Called from a server thread as it's exiting. Caller must hold the BKL or - * the "service mutex", whichever is appropriate for the service. + * Called from a server thread as it's exiting. Caller must hold the "service + * mutex" for the service. */ void svc_exit_thread(struct svc_rqst *rqstp) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c69358b3cf7..163ac45c363 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list); * svc_pool->sp_lock protects most of the fields of that pool. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. - * BKL protects svc_serv->sv_nrthread. + * The "service mutex" protects svc_serv->sv_nrthread. * svc_sock->sk_lock protects the svc_sock->sk_deferred list * and the ->sk_info_authunix cache. * @@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list); * that no other thread will be using the transport or will * try to set XPT_DEAD. */ - int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; -- cgit v1.2.3-70-g09d2