diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 15:13:13 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 15:13:13 -0800 |
commit | e20db597b6264de55ea6636fc79b1e4aaa89d129 (patch) | |
tree | fa650cb9cfad4b22598ae9867ca92baf57491a9d | |
parent | 3a5dc1fafb016560315fe45bb4ef8bde259dd1bc (diff) | |
parent | 388f0c776781fe64ce951701bfe712b2182a31f2 (diff) |
Merge tag 'nfs-for-3.19-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Features:
- NFSv4.2 client support for hole punching and preallocation.
- Further RPC/RDMA client improvements.
- Add more RPC transport debugging tracepoints.
- Add RPC debugging tools in debugfs.
Bugfixes:
- Stable fix for layoutget error handling
- Fix a change in COMMIT behaviour resulting from the recent io code
updates"
* tag 'nfs-for-3.19-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (28 commits)
sunrpc: add a debugfs rpc_xprt directory with an info file in it
sunrpc: add debugfs file for displaying client rpc_task queue
nfs: Add DEALLOCATE support
nfs: Add ALLOCATE support
NFS: Clean up nfs4_init_callback()
NFS: SETCLIENTID XDR buffer sizes are incorrect
SUNRPC: serialize iostats updates
xprtrdma: Display async errors
xprtrdma: Enable pad optimization
xprtrdma: Re-write rpcrdma_flush_cqs()
xprtrdma: Refactor tasklet scheduling
xprtrdma: unmap all FMRs during transport disconnect
xprtrdma: Cap req_cqinit
xprtrdma: Return an errno from rpcrdma_register_external()
nfs: define nfs_inc_fscache_stats and using it as possible
nfs: replace nfs_add_stats with nfs_inc_stats when add one
NFS: Deletion of unnecessary checks before the function call "nfs_put_client"
sunrpc: eliminate RPC_TRACEPOINTS
sunrpc: eliminate RPC_DEBUG
lockd: eliminate LOCKD_DEBUG
...
66 files changed, 1171 insertions, 244 deletions
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 13db95f5417..56598742dde 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops; static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); -#ifdef LOCKD_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) { /* diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 4f46f7a0528..77fec6a55f5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -812,7 +812,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) /* Optimize common case that writes from 0 to end of file */ end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); - if (end != NFS_I(inode)->npages) { + if (end != inode->i_mapping->nrpages) { rcu_read_lock(); end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); rcu_read_unlock(); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 73466b93409..e36a9d78ea4 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->npages != 0) + if (nfsi->nrequests != 0) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 9bb806a76d9..bfecac781f1 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -204,8 +204,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) ifdebug(FACILITY) print_ds(ds); - if (ds->ds_clp) - nfs_put_client(ds->ds_clp); + nfs_put_client(ds->ds_clp); while (!list_empty(&ds->ds_addrs)) { da = list_first_entry(&ds->ds_addrs, diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3ef01f0ba0b..d63bea8bbfb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -269,8 +269,8 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } return 1; @@ -293,8 +293,8 @@ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) BUG_ON(!PageLocked(page)); fscache_uncache_page(cookie, page); - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } /* @@ -343,19 +343,19 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, case 0: /* read BIO submitted (page in fscache) */ dfprintk(FSCACHE, "NFS: readpage_from_fscache: BIO submitted\n"); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); return ret; case -ENOBUFS: /* inode not in cache */ case -ENODATA: /* page not in cache */ - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); return 1; default: dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); } return ret; } @@ -429,11 +429,11 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) if (ret != 0) { fscache_uncache_page(nfs_i_fscache(inode), page); - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); } else { - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK); } } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00689a8a85e..4bffe637ea3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode) nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_caches); void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { @@ -1149,7 +1150,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) { + && nfsi->nrequests == 0) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1192,7 +1193,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->npages == 0) + if (cur_size != new_isize && nfsi->nrequests == 0) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } @@ -1619,7 +1620,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0) || new_isize > cur_isize) { + if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid &= ~NFS_INO_REVAL_PAGECACHE; @@ -1784,7 +1785,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->npages = 0; + nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index c5832487c45..0cb806fbd4c 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -55,6 +55,11 @@ static inline void nfs_add_fscache_stats(struct inode *inode, { this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); } +static inline void nfs_inc_fscache_stats(struct inode *inode, + enum nfs_stat_fscachecounters stat) +{ + this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]); +} #endif static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index d10333a197b..7afb8947dfd 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -6,6 +6,8 @@ #define __LINUX_FS_NFS_NFS4_2_H /* nfs4.2proc.c */ +int nfs42_proc_allocate(struct file *, loff_t, loff_t); +int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); /* nfs4.2xdr.h */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0886f1db591..cb170722769 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -32,6 +32,81 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, return ret; } +static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + struct nfs42_falloc_args args = { + .falloc_fh = NFS_FH(inode), + .falloc_offset = offset, + .falloc_length = len, + }; + struct nfs42_falloc_res res; + struct nfs_server *server = NFS_SERVER(inode); + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); + if (status) + return status; + + return nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); +} + +static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_fallocate(msg, filep, offset, len); + if (err == -ENOTSUPP) + return -EOPNOTSUPP; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; + return err; +} + +int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; + return err; +} + loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); @@ -50,7 +125,7 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) struct nfs_server *server = NFS_SERVER(inode); int status; - if (!(server->caps & NFS_CAP_SEEK)) + if (!nfs_server_capable(inode, NFS_CAP_SEEK)) return -ENOTSUPP; status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c90469b604b..038a7e1521f 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,15 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#define encode_fallocate_maxsz (encode_stateid_maxsz + \ + 2 /* offset */ + \ + 2 /* length */) +#define encode_allocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_allocate_maxsz (op_decode_hdr_maxsz) +#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_deallocate_maxsz (op_decode_hdr_maxsz) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -14,6 +23,18 @@ 2 /* offset */ + \ 2 /* length */) +#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_allocate_maxsz) +#define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_allocate_maxsz) +#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_deallocate_maxsz) +#define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_deallocate_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_seek_maxsz) @@ -22,6 +43,30 @@ decode_seek_maxsz) +static void encode_fallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + encode_nfs4_stateid(xdr, &args->falloc_stateid); + encode_uint64(xdr, args->falloc_offset); + encode_uint64(xdr, args->falloc_length); +} + +static void encode_allocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + +static void encode_deallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + static void encode_seek(struct xdr_stream *xdr, struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -33,6 +78,42 @@ static void encode_seek(struct xdr_stream *xdr, } /* + * Encode ALLOCATE request + */ +static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_allocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* + * Encode DEALLOCATE request + */ +static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_deallocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* * Encode SEEK request */ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, @@ -50,6 +131,16 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_ALLOCATE); +} + +static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_DEALLOCATE); +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -73,6 +164,54 @@ out_overflow: } /* + * Decode ALLOCATE request + */ +static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_allocate(xdr, res); +out: + return status; +} + +/* + * Decode DEALLOCATE request + */ +static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_deallocate(xdr, res); +out: + return status; +} + +/* * Decode SEEK request */ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index be6cac37ea1..a08178764cf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); /* nfs4proc.c */ +extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ffdb28d86cf..03311259b0c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -241,28 +241,25 @@ void nfs4_free_client(struct nfs_client *clp) */ static int nfs4_init_callback(struct nfs_client *clp) { + struct rpc_xprt *xprt; int error; - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + if (error < 0) return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + return 0; } @@ -498,8 +495,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -517,8 +513,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); @@ -549,8 +544,7 @@ int nfs40_walk_client_list(struct nfs_client *new, /* No match found. The server lost our clientid */ out: - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); dprintk("NFS: <-- %s status = %d\n", __func__, status); return status; } @@ -641,8 +635,7 @@ int nfs41_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -675,8 +668,7 @@ int nfs41_walk_client_list(struct nfs_client *new, /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c51fb4db9bf..8b46389c4c5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -3,6 +3,8 @@ * * Copyright (C) 1992 Rick Sladkey */ +#include <linux/fs.h> +#include <linux/falloc.h> #include <linux/nfs_fs.h> #include "internal.h" #include "fscache.h" @@ -134,6 +136,32 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) return nfs_file_llseek(filep, offset, whence); } } + +static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + long ret; + + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))) + return -EOPNOTSUPP; + + ret = inode_newsize_ok(inode, offset + len); + if (ret < 0) + return ret; + + mutex_lock(&inode->i_mutex); + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = nfs42_proc_deallocate(filep, offset, len); + else + ret = nfs42_proc_allocate(filep, offset, len); + mutex_unlock(&inode->i_mutex); + + nfs_zap_caches(inode); + return ret; +} #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { @@ -155,6 +183,9 @@ const struct file_operations nfs4_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, +#ifdef CONFIG_NFS_V4_2 + .fallocate = nfs42_fallocate, +#endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 69dc20a743f..e7f8d5ff258 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -158,8 +158,6 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; - case -NFS4ERR_ACCESS: - return -EACCES; case -NFS4ERR_FILE_OPEN: return -EBUSY; default: @@ -344,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; @@ -7704,6 +7702,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -7716,9 +7717,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); @@ -8426,6 +8424,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_ALLOCATE + | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 206c08a60c7..cb4376b78ed 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -141,13 +141,15 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ 1 /* sc_prog */ + \ - XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ - XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ 1) /* sc_cb_ident */ #define decode_setclientid_maxsz \ (op_decode_hdr_maxsz + \ - 2 + \ - 1024) /* large value for CLID_INUSE */ + 2 /* clientid */ + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) #define encode_setclientid_confirm_maxsz \ (op_encode_hdr_maxsz + \ 3 + (NFS4_VERIFIER_SIZE >> 2)) @@ -7394,6 +7396,8 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 PROC(SEEK, enc_seek, dec_seek), + PROC(ALLOCATE, enc_allocate, dec_allocate), + PROC(DEALLOCATE, enc_deallocate, dec_deallocate), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed0db61f854..2b5e769beb1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) static inline void nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) { + struct inode *inode; WARN_ON_ONCE(prev == req); if (!prev) { @@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) * nfs_page_group_destroy is called */ kref_get(&req->wb_head->wb_kref); - /* grab extra ref if head request has extra ref from - * the write/commit path to handle handoff between write - * and commit lists */ + /* grab extra ref and bump the request count if head request + * has extra ref from the write/commit path to handle handoff + * between write and commit lists. */ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); + spin_lock(&inode->i_lock); + NFS_I(inode)->nrequests++; + spin_unlock(&inode->i_lock); } } } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index beff2769c5c..c91a4799c56 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - nfs_add_stats(inode, NFSIOS_READPAGES, 1); + nfs_inc_stats(inode, NFSIOS_READPAGES); /* * Try to flush any pending writes to the file.. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f83b02dc916..af3af685a9e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + nfs_inc_stats(inode, NFSIOS_WRITEPAGES); nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); @@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (!nfsi->nrequests && + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* * Swap-space should not get truncated. Hence no need to plug the race @@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->npages++; + nfsi->nrequests++; /* this a head request for a page group - mark it as having an - * extra reference so sub groups can follow suit */ + * extra reference so sub groups can follow suit. + * This flag also informs pgio layer when to bump nrequests when + * adding subrequests. */ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->npages--; + nfsi->nrequests--; + spin_unlock(&inode->i_lock); + } else { + spin_lock(&inode->i_lock); + nfsi->nrequests--; spin_unlock(&inode->i_lock); } @@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h index 257d3779f2a..0ca8109934e 100644 --- a/include/linux/lockd/debug.h +++ b/include/linux/lockd/debug.h @@ -17,12 +17,8 @@ * Enable lockd debugging. * Requires RPC_DEBUG. */ -#ifdef RPC_DEBUG -# define LOCKD_DEBUG 1 -#endif - #undef ifdebug -#if defined(RPC_DEBUG) && defined(LOCKD_DEBUG) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(flag) if (unlikely(nlm_debug & NLMDBG_##flag)) #else # define ifdebug(flag) if (0) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 356acc2846f..022b761dbf0 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -490,6 +490,8 @@ enum { /* nfs42 */ NFSPROC4_CLNT_SEEK, + NFSPROC4_CLNT_ALLOCATE, + NFSPROC4_CLNT_DEALLOCATE, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c72d1ad41ad..6d627b92df5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,7 +163,7 @@ struct nfs_inode { */ __be32 cookieverf[2]; - unsigned long npages; + unsigned long nrequests; struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ @@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); static inline int nfs_have_writebacks(struct inode *inode) { - return NFS_I(inode)->npages != 0; + return NFS_I(inode)->nrequests != 0; } /* diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a32ba0d7a98..1e37fbb78f7 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -231,5 +231,7 @@ struct nfs_server { #define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17) #define NFS_CAP_SECURITY_LABEL (1U << 18) #define NFS_CAP_SEEK (1U << 19) +#define NFS_CAP_ALLOCATE (1U << 20) +#define NFS_CAP_DEALLOCATE (1U << 21) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 47ebb4fafd8..467c84efb59 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1243,6 +1243,20 @@ nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 +struct nfs42_falloc_args { + struct nfs4_sequence_args seq_args; + + struct nfs_fh *falloc_fh; + nfs4_stateid falloc_stateid; + u64 falloc_offset; + u64 falloc_length; +}; + +struct nfs42_falloc_res { + struct nfs4_sequence_res seq_res; + unsigned int status; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 8e030075fe7..a7cbb570cc5 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -53,7 +53,7 @@ struct rpc_cred { struct rcu_head cr_rcu; struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) unsigned long cr_magic; /* 0x0f4aa4f0 */ #endif unsigned long cr_expire; /* when to gc */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 70736b98c72..d86acc63b25 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -63,6 +63,9 @@ struct rpc_clnt { struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; const struct rpc_program *cl_program; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *cl_debugfs; /* debugfs directory */ +#endif }; /* @@ -176,5 +179,6 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); +const char *rpc_proc_name(const struct rpc_task *task); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 9385bd74c86..c57d8ea0716 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -10,22 +10,10 @@ #include <uapi/linux/sunrpc/debug.h> - -/* - * Enable RPC debugging/profiling. - */ -#ifdef CONFIG_SUNRPC_DEBUG -#define RPC_DEBUG -#endif -#ifdef CONFIG_TRACEPOINTS -#define RPC_TRACEPOINTS -#endif -/* #define RPC_PROFILE */ - /* * Debugging macros etc */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) extern unsigned int rpc_debug; extern unsigned int nfs_debug; extern unsigned int nfsd_debug; @@ -36,7 +24,7 @@ extern unsigned int nlm_debug; #define dprintk_rcu(args...) dfprintk_rcu(FACILITY, ## args) #undef ifdebug -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) # define dfprintk(fac, args...) \ @@ -65,9 +53,55 @@ extern unsigned int nlm_debug; /* * Sysctl interface for RPC debugging */ -#ifdef RPC_DEBUG + +struct rpc_clnt; +struct rpc_xprt; + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); void rpc_unregister_sysctl(void); +int sunrpc_debugfs_init(void); +void sunrpc_debugfs_exit(void); +int rpc_clnt_debugfs_register(struct rpc_clnt *); +void rpc_clnt_debugfs_unregister(struct rpc_clnt *); +int rpc_xprt_debugfs_register(struct rpc_xprt *); +void rpc_xprt_debugfs_unregister(struct rpc_xprt *); +#else +static inline int +sunrpc_debugfs_init(void) +{ + return 0; +} + +static inline void +sunrpc_debugfs_exit(void) +{ + return; +} + +static inline int +rpc_clnt_debugfs_register(struct rpc_clnt *clnt) +{ + return 0; +} + +static inline void +rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) +{ + return; +} + +static inline int +rpc_xprt_debugfs_register(struct rpc_xprt *xprt) +{ + return 0; +} + +static inline void +rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) +{ + return; +} #endif #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 1565bbe86d5..eecb5a71e6c 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -27,10 +27,13 @@ #include <linux/seq_file.h> #include <linux/ktime.h> +#include <linux/spinlock.h> #define RPC_IOSTATS_VERS "1.0" struct rpc_iostats { + spinlock_t om_lock; + /* * These counters give an idea about how many request * transmissions are required, on average, to complete that diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 1a8959944c5..5f1e6bd4c31 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -79,7 +79,7 @@ struct rpc_task { unsigned short tk_flags; /* misc flags */ unsigned short tk_timeouts; /* maj timeouts */ -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) unsigned short tk_pid; /* debugging aid */ #endif unsigned char tk_priority : 2,/* Task priority */ @@ -187,7 +187,7 @@ struct rpc_wait_queue { unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; #endif }; @@ -237,7 +237,7 @@ void rpc_free(void *); int rpciod_up(void); void rpciod_down(void); int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct net; void rpc_show_tasks(struct net *); #endif @@ -251,7 +251,7 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task) return __rpc_wait_for_completion_task(task, NULL); } -#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index cf391eef2e6..9d27ac45b90 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -239,6 +239,9 @@ struct rpc_xprt { struct net *xprt_net; const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *debugfs; /* debugfs directory */ +#endif }; #if defined(CONFIG_SUNRPC_BACKCHANNEL) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 1ad36cc25b2..7591788e9fb 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -17,6 +17,65 @@ void cleanup_socket_xprt(void); #define RPC_DEF_MIN_RESVPORT (665U) #define RPC_DEF_MAX_RESVPORT (1023U) +struct sock_xprt { + struct rpc_xprt xprt; + + /* + * Network layer + */ + struct socket * sock; + struct sock * inet; + + /* + * State of TCP reply receive + */ + __be32 tcp_fraghdr, + tcp_xid, + tcp_calldir; + + u32 tcp_offset, + tcp_reclen; + + unsigned long tcp_copied, + tcp_flags; + + /* + * Connection of transports + */ + struct delayed_work connect_worker; + struct sockaddr_storage srcaddr; + unsigned short srcport; + + /* + * UDP socket buffer size parameters + */ + size_t rcvsize, + sndsize; + + /* + * Saved socket callback addresses + */ + void (*old_data_ready)(struct sock *); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); + void (*old_error_report)(struct sock *); +}; + +/* + * TCP receive state flags + */ +#define TCP_RCV_LAST_FRAG (1UL << 0) +#define TCP_RCV_COPY_FRAGHDR (1UL << 1) +#define TCP_RCV_COPY_XID (1UL << 2) +#define TCP_RCV_COPY_DATA (1UL << 3) +#define TCP_RCV_READ_CALLDIR (1UL << 4) +#define TCP_RCV_COPY_CALLDIR (1UL << 5) + +/* + * TCP RPC flags + */ +#define TCP_RPC_REPLY (1UL << 6) + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1fef3e6e943..171ca4ff6d9 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -6,6 +6,8 @@ #include <linux/sunrpc/sched.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/xprtsock.h> #include <net/tcp_states.h> #include <linux/net.h> #include <linux/tracepoint.h> @@ -306,6 +308,164 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +DECLARE_EVENT_CLASS(rpc_xprt_event, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + + TP_ARGS(xprt, xid, status), + + TP_STRUCT__entry( + __field(__be32, xid) + __field(int, status) + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + __entry->xid = xid; + __entry->status = status; + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk("peer=[%s]:%s xid=0x%x status=%d", __get_str(addr), + __get_str(port), be32_to_cpu(__entry->xid), + __entry->status) +); + +DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + TP_ARGS(xprt, xid, status)); + +DEFINE_EVENT(rpc_xprt_event, xprt_transmit, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + TP_ARGS(xprt, xid, status)); + +DEFINE_EVENT(rpc_xprt_event, xprt_complete_rqst, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + TP_ARGS(xprt, xid, status)); + +TRACE_EVENT(xs_tcp_data_ready, + TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total), + + TP_ARGS(xprt, err, total), + + TP_STRUCT__entry( + __field(int, err) + __field(unsigned int, total) + __string(addr, xprt ? xprt->address_strings[RPC_DISPLAY_ADDR] : + "(null)") + __string(port, xprt ? xprt->address_strings[RPC_DISPLAY_PORT] : + "(null)") + ), + + TP_fast_assign( + __entry->err = err; + __entry->total = total; + __assign_str(addr, xprt ? + xprt->address_strings[RPC_DISPLAY_ADDR] : "(null)"); + __assign_str(port, xprt ? + xprt->address_strings[RPC_DISPLAY_PORT] : "(null)"); + ), + + TP_printk("peer=[%s]:%s err=%d total=%u", __get_str(addr), + __get_str(port), __entry->err, __entry->total) +); + +#define rpc_show_sock_xprt_flags(flags) \ + __print_flags(flags, "|", \ + { TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \ + { TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \ + { TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \ + { TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \ + { TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \ + { TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \ + { TCP_RPC_REPLY, "TCP_RPC_REPLY" }) + +TRACE_EVENT(xs_tcp_data_recv, + TP_PROTO(struct sock_xprt *xs), + + TP_ARGS(xs), + + TP_STRUCT__entry( + __string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]) + __string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]) + __field(__be32, xid) + __field(unsigned long, flags) + __field(unsigned long, copied) + __field(unsigned int, reclen) + __field(unsigned long, offset) + ), + + TP_fast_assign( + __assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]); + __entry->xid = xs->tcp_xid; + __entry->flags = xs->tcp_flags; + __entry->copied = xs->tcp_copied; + __entry->reclen = xs->tcp_reclen; + __entry->offset = xs->tcp_offset; + ), + + TP_printk("peer=[%s]:%s xid=0x%x flags=%s copied=%lu reclen=%u offset=%lu", + __get_str(addr), __get_str(port), be32_to_cpu(__entry->xid), + rpc_show_sock_xprt_flags(__entry->flags), + __entry->copied, __entry->reclen, __entry->offset) +); + +TRACE_EVENT(svc_recv, + TP_PROTO(struct svc_rqst *rqst, int status), + + TP_ARGS(rqst, status), + + TP_STRUCT__entry( + __field(struct sockaddr *, addr) + __field(__be32, xid) + __field(int, status) + ), + + TP_fast_assign( + __entry->addr = (struct sockaddr *)&rqst->rq_addr; + __entry->xid = status > 0 ? rqst->rq_xid : 0; + __entry->status = status; + ), + + TP_printk("addr=%pIScp xid=0x%x status=%d", __entry->addr, + be32_to_cpu(__entry->xid), __entry->status) +); + +DECLARE_EVENT_CLASS(svc_rqst_status, + + TP_PROTO(struct svc_rqst *rqst, int status), + + TP_ARGS(rqst, status), + + TP_STRUCT__entry( + __field(struct sockaddr *, addr) + __field(__be32, xid) + __field(int, dropme) + __field(int, status) + ), + + TP_fast_assign( + __entry->addr = (struct sockaddr *)&rqst->rq_addr; + __entry->xid = rqst->rq_xid; + __entry->dropme = (int)rqst->rq_dropme; + __entry->status = status; + ), + + TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d", + __entry->addr, be32_to_cpu(__entry->xid), __entry->dropme, + __entry->status) +); + +DEFINE_EVENT(svc_rqst_status, svc_process, + TP_PROTO(struct svc_rqst *rqst, int status), + TP_ARGS(rqst, status)); + +DEFINE_EVENT(svc_rqst_status, svc_send, + TP_PROTO(struct svc_rqst *rqst, int status), + TP_ARGS(rqst, status)); + #endif /* _TRACE_SUNRPC_H */ #include <trace/define_trace.h> diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h index a6f453c740b..1fdc95bb237 100644 --- a/include/uapi/linux/nfsd/debug.h +++ b/include/uapi/linux/nfsd/debug.h @@ -15,7 +15,7 @@ * Enable debugging for nfsd. * Requires RPC_DEBUG. */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define NFSD_DEBUG 1 #endif diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 0754d0f466d..fb78117b896 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -35,6 +35,7 @@ config RPCSEC_GSS_KRB5 config SUNRPC_DEBUG bool "RPC: Enable dprintk debugging" depends on SUNRPC && SYSCTL + select DEBUG_FS help This option enables a sysctl-based debugging interface that is be used by the 'rpcdebug' utility to turn on or off diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index e5a7a1cac8f..15e6f6c23c5 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -14,6 +14,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o +sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 383eb919ac0..47f38be4155 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -16,7 +16,7 @@ #include <linux/sunrpc/gss_api.h> #include <linux/spinlock.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -646,7 +646,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, cred->cr_auth = auth; cred->cr_ops = ops; cred->cr_expire = jiffies; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) cred->cr_magic = RPCAUTH_CRED_MAGIC; #endif cred->cr_uid = acred->uid; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 6f6b829c9e8..41248b1820c 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -14,7 +14,7 @@ #include <linux/sunrpc/debug.h> #include <linux/sunrpc/sched.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 53ed8d3f889..dace13d7638 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -66,7 +66,7 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; #define GSS_KEY_EXPIRE_TIMEO 240 static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index c586e92bcf7..254defe446a 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -38,7 +38,7 @@ #include <linux/sunrpc/gss_asn1.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index f5ed9f6ece0..b5408e8a37f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -45,7 +45,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/sunrpc/xdr.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 24589bd2a4b..234fa8d0fd9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -61,7 +61,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/lcm.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0d3c158ef8f..28db442a003 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -45,7 +45,7 @@ #include <linux/crypto.h> #include <linux/sunrpc/gss_krb5_enctypes.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 42768e5c399..1d74d653e6c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -64,7 +64,7 @@ #include <linux/random.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index 62ac90c62cb..20d55c793eb 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -35,7 +35,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 6c981ddc19f..dcf9515d9ae 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -62,7 +62,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 4b614c604fe..ca7e92a32f8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -35,7 +35,7 @@ #include <linux/pagemap.h> #include <linux/crypto.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 92d5ab99fbf..7063d856a59 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -46,7 +46,7 @@ #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/clnt.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h index 685a688f3d8..9d88c6239f0 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.h +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h @@ -25,7 +25,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprtsock.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c548ab213f7..de856ddf5fe 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -51,7 +51,7 @@ #include "gss_rpc_upcall.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 712c123e04e..c2a2b584a05 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <linux/sunrpc/clnt.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -138,7 +138,7 @@ struct rpc_cred null_cred = { .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) .cr_magic = RPCAUTH_CRED_MAGIC, #endif }; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index d5d69236629..4feda2d0a83 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -25,7 +25,7 @@ struct unx_cred { #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9761a0da964..651f49ab601 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -27,7 +27,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <linux/export.h> #include <linux/sunrpc/bc_xprt.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #define RPCDBG_FACILITY RPCDBG_TRANS #endif diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9acd6ce88db..05da12a3394 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -42,7 +42,7 @@ #include "sunrpc.h" #include "netns.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_CALL #endif @@ -305,6 +305,10 @@ static int rpc_client_register(struct rpc_clnt *clnt, struct super_block *pipefs_sb; int err; + err = rpc_clnt_debugfs_register(clnt); + if (err) + return err; + pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { err = rpc_setup_pipedir(pipefs_sb, clnt); @@ -331,6 +335,7 @@ err_auth: out: if (pipefs_sb) rpc_put_sb_net(net); + rpc_clnt_debugfs_unregister(clnt); return err; } @@ -670,6 +675,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, rpc_unregister_client(clnt); __rpc_clnt_remove_pipedir(clnt); + rpc_clnt_debugfs_unregister(clnt); /* * A new transport was created. "clnt" therefore @@ -771,6 +777,7 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) parent = clnt->cl_parent; + rpc_clnt_debugfs_unregister(clnt); rpc_clnt_remove_pipedir(clnt); rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); @@ -1396,8 +1403,9 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); -#ifdef RPC_DEBUG -static const char *rpc_proc_name(const struct rpc_task *task) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +const char +*rpc_proc_name(const struct rpc_task *task) { const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -2421,7 +2429,7 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int } EXPORT_SYMBOL_GPL(rpc_call_null); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static void rpc_show_header(void) { printk(KERN_INFO "-pid- flgs status -client- --rqstp- " diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c new file mode 100644 index 00000000000..e811f390f9f --- /dev/null +++ b/net/sunrpc/debugfs.c @@ -0,0 +1,292 @@ +/** + * debugfs interface for sunrpc + * + * (c) 2014 Jeff Layton <jlayton@primarydata.com> + */ + +#include <linux/debugfs.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/clnt.h> +#include "netns.h" + +static struct dentry *topdir; +static struct dentry *rpc_clnt_dir; +static struct dentry *rpc_xprt_dir; + +struct rpc_clnt_iter { + struct rpc_clnt *clnt; + loff_t pos; +}; + +static int +tasks_show(struct seq_file *f, void *v) +{ + u32 xid = 0; + struct rpc_task *task = v; + struct rpc_clnt *clnt = task->tk_client; + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + if (task->tk_rqstp) + xid = be32_to_cpu(task->tk_rqstp->rq_xid); + + seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt->cl_clid, xid, task->tk_timeout, task->tk_ops, + clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), + task->tk_action, rpc_waitq); + return 0; +} + +static void * +tasks_start(struct seq_file *f, loff_t *ppos) + __acquires(&clnt->cl_lock) +{ + struct rpc_clnt_iter *iter = f->private; + loff_t pos = *ppos; + struct rpc_clnt *clnt = iter->clnt; + struct rpc_task *task; + + iter->pos = pos + 1; + spin_lock(&clnt->cl_lock); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) + if (pos-- == 0) + return task; + return NULL; +} + +static void * +tasks_next(struct seq_file *f, void *v, loff_t *pos) +{ + struct rpc_clnt_iter *iter = f->private; + struct rpc_clnt *clnt = iter->clnt; + struct rpc_task *task = v; + struct list_head *next = task->tk_task.next; + + ++iter->pos; + ++*pos; + + /* If there's another task on list, return it */ + if (next == &clnt->cl_tasks) + return NULL; + return list_entry(next, struct rpc_task, tk_task); +} + +static void +tasks_stop(struct seq_file *f, void *v) + __releases(&clnt->cl_lock) +{ + struct rpc_clnt_iter *iter = f->private; + struct rpc_clnt *clnt = iter->clnt; + + spin_unlock(&clnt->cl_lock); +} + +static const struct seq_operations tasks_seq_operations = { + .start = tasks_start, + .next = tasks_next, + .stop = tasks_stop, + .show = tasks_show, +}; + +static int tasks_open(struct inode *inode, struct file *filp) +{ + int ret = seq_open_private(filp, &tasks_seq_operations, + sizeof(struct rpc_clnt_iter)); + + if (!ret) { + struct seq_file *seq = filp->private_data; + struct rpc_clnt_iter *iter = seq->private; + + iter->clnt = inode->i_private; + + if (!atomic_inc_not_zero(&iter->clnt->cl_count)) { + seq_release_private(inode, filp); + ret = -EINVAL; + } + } + + return ret; +} + +static int +tasks_release(struct inode *inode, struct file *filp) +{ + struct seq_file *seq = filp->private_data; + struct rpc_clnt_iter *iter = seq->private; + + rpc_release_client(iter->clnt); + return seq_release_private(inode, filp); +} + +static const struct file_operations tasks_fops = { + .owner = THIS_MODULE, + .open = tasks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tasks_release, +}; + +int +rpc_clnt_debugfs_register(struct rpc_clnt *clnt) +{ + int len, err; + char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ + + /* Already registered? */ + if (clnt->cl_debugfs) + return 0; + + len = snprintf(name, sizeof(name), "%x", clnt->cl_clid); + if (len >= sizeof(name)) + return -EINVAL; + + /* make the per-client dir */ + clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir); + if (!clnt->cl_debugfs) + return -ENOMEM; + + /* make tasks file */ + err = -ENOMEM; + if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, + clnt, &tasks_fops)) + goto out_err; + + err = -EINVAL; + rcu_read_lock(); + len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", + rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name); + rcu_read_unlock(); + if (len >= sizeof(name)) + goto out_err; + + err = -ENOMEM; + if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name)) + goto out_err; + + return 0; +out_err: + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; + return err; +} + +void +rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) +{ + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; +} + +static int +xprt_info_show(struct seq_file *f, void *v) +{ + struct rpc_xprt *xprt = f->private; + + seq_printf(f, "netid: %s\n", xprt->address_strings[RPC_DISPLAY_NETID]); + seq_printf(f, "addr: %s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); + seq_printf(f, "port: %s\n", xprt->address_strings[RPC_DISPLAY_PORT]); + seq_printf(f, "state: 0x%lx\n", xprt->state); + return 0; +} + +static int +xprt_info_open(struct inode *inode, struct file *filp) +{ + int ret; + struct rpc_xprt *xprt = inode->i_private; + + ret = single_open(filp, xprt_info_show, xprt); + + if (!ret) { + if (!xprt_get(xprt)) { + single_release(inode, filp); + ret = -EINVAL; + } + } + return ret; +} + +static int +xprt_info_release(struct inode *inode, struct file *filp) +{ + struct rpc_xprt *xprt = inode->i_private; + + xprt_put(xprt); + return single_release(inode, filp); +} + +static const struct file_operations xprt_info_fops = { + .owner = THIS_MODULE, + .open = xprt_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = xprt_info_release, +}; + +int +rpc_xprt_debugfs_register(struct rpc_xprt *xprt) +{ + int len, id; + static atomic_t cur_id; + char name[9]; /* 8 hex digits + NULL term */ + + id = (unsigned int)atomic_inc_return(&cur_id); + + len = snprintf(name, sizeof(name), "%x", id); + if (len >= sizeof(name)) + return -EINVAL; + + /* make the per-client dir */ + xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir); + if (!xprt->debugfs) + return -ENOMEM; + + /* make tasks file */ + if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs, + xprt, &xprt_info_fops)) { + debugfs_remove_recursive(xprt->debugfs); + xprt->debugfs = NULL; + return -ENOMEM; + } + + return 0; +} + +void +rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) +{ + debugfs_remove_recursive(xprt->debugfs); + xprt->debugfs = NULL; +} + +void __exit +sunrpc_debugfs_exit(void) +{ + debugfs_remove_recursive(topdir); +} + +int __init +sunrpc_debugfs_init(void) +{ + topdir = debugfs_create_dir("sunrpc", NULL); + if (!topdir) + goto out; + + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); + if (!rpc_clnt_dir) + goto out_remove; + + rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir); + if (!rpc_xprt_dir) + goto out_remove; + + return 0; +out_remove: + debugfs_remove_recursive(topdir); + topdir = NULL; +out: + return -ENOMEM; +} diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 1891a1022c1..05202012bcf 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,7 +32,7 @@ #include "netns.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_BIND #endif diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fe3441abdbe..d20f2329eea 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -24,7 +24,7 @@ #include "sunrpc.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #define RPCDBG_FACILITY RPCDBG_SCHED #endif @@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key) return 0; } -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 54530490944..9711a155bc5 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -116,7 +116,15 @@ EXPORT_SYMBOL_GPL(svc_seq_show); */ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { - return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); + struct rpc_iostats *stats; + int i; + + stats = kcalloc(clnt->cl_maxproc, sizeof(*stats), GFP_KERNEL); + if (stats) { + for (i = 0; i < clnt->cl_maxproc; i++) + spin_lock_init(&stats[i].om_lock); + } + return stats; } EXPORT_SYMBOL_GPL(rpc_alloc_iostats); @@ -135,20 +143,21 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); * rpc_count_iostats - tally up per-task stats * @task: completed rpc_task * @stats: array of stat structures - * - * Relies on the caller for serialization. */ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_iostats *op_metrics; - ktime_t delta; + ktime_t delta, now; if (!stats || !req) return; + now = ktime_get(); op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; + spin_lock(&op_metrics->om_lock); + op_metrics->om_ops++; op_metrics->om_ntrans += req->rq_ntrans; op_metrics->om_timeouts += task->tk_timeouts; @@ -161,8 +170,10 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); - delta = ktime_sub(ktime_get(), task->tk_start); + delta = ktime_sub(now, task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); + + spin_unlock(&op_metrics->om_lock); } EXPORT_SYMBOL_GPL(rpc_count_iostats); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index cd30120de9e..e37fbed8795 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -97,13 +97,20 @@ init_sunrpc(void) err = register_rpc_pipefs(); if (err) goto out4; -#ifdef RPC_DEBUG + + err = sunrpc_debugfs_init(); + if (err) + goto out5; + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); #endif svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; +out5: + unregister_rpc_pipefs(); out4: unregister_pernet_subsys(&sunrpc_net_ops); out3: @@ -120,10 +127,11 @@ cleanup_sunrpc(void) rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); + sunrpc_debugfs_exit(); unregister_rpc_pipefs(); rpc_destroy_mempool(); unregister_pernet_subsys(&sunrpc_net_ops); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_unregister_sysctl(); #endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ca8a7958f4e..2783fd80c22 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -28,6 +28,8 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/bc_xprt.h> +#include <trace/events/sunrpc.h> + #define RPCDBG_FACILITY RPCDBG_SVCDSP static void svc_unregister(const struct svc_serv *serv, struct net *net); @@ -1040,7 +1042,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) /* * dprintk the given error with the address of the client that caused it. */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { @@ -1314,24 +1316,25 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; - rqstp->rq_xid = svc_getu32(argv); - dir = svc_getnl(argv); if (dir != 0) { /* direction != CALL */ svc_printk(rqstp, "bad direction %d, dropping request\n", dir); serv->sv_stats->rpcbadfmt++; - svc_drop(rqstp); - return 0; + goto out_drop; } /* Returns 1 for send, 0 for drop */ - if (svc_process_common(rqstp, argv, resv)) - return svc_send(rqstp); - else { - svc_drop(rqstp); - return 0; + if (likely(svc_process_common(rqstp, argv, resv))) { + int ret = svc_send(rqstp); + + trace_svc_process(rqstp, ret); + return ret; } +out_drop: + trace_svc_process(rqstp, 0); + svc_drop(rqstp); + return 0; } #if defined(CONFIG_SUNRPC_BACKCHANNEL) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c179ca2a5aa..bbb3b044b87 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -15,6 +15,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprt.h> #include <linux/module.h> +#include <trace/events/sunrpc.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -773,35 +774,43 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) err = svc_alloc_arg(rqstp); if (err) - return err; + goto out; try_to_freeze(); cond_resched(); + err = -EINTR; if (signalled() || kthread_should_stop()) - return -EINTR; + goto out; xprt = svc_get_next_xprt(rqstp, timeout); - if (IS_ERR(xprt)) - return PTR_ERR(xprt); + if (IS_ERR(xprt)) { + err = PTR_ERR(xprt); + goto out; + } len = svc_handle_xprt(rqstp, xprt); /* No data, incomplete (TCP) read, or accept() */ + err = -EAGAIN; if (len <= 0) - goto out; + goto out_release; clear_bit(XPT_OLD, &xprt->xpt_flags); rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; + rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); if (serv->sv_stats) serv->sv_stats->netcnt++; + trace_svc_recv(rqstp, len); return len; -out: +out_release: rqstp->rq_res.len = 0; svc_xprt_release(rqstp); - return -EAGAIN; +out: + trace_svc_recv(rqstp, err); + return err; } EXPORT_SYMBOL_GPL(svc_recv); @@ -821,12 +830,12 @@ EXPORT_SYMBOL_GPL(svc_drop); int svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; - int len; + int len = -EFAULT; struct xdr_buf *xb; xprt = rqstp->rq_xprt; if (!xprt) - return -EFAULT; + goto out; /* release the receive skb before sending the reply */ rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); @@ -849,7 +858,9 @@ int svc_send(struct svc_rqst *rqstp) svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - return 0; + len = 0; +out: + trace_svc_send(rqstp, len); return len; } diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index c99c58e2ee6..887f0183b4c 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(nfsd_debug); unsigned int nlm_debug; EXPORT_SYMBOL_GPL(nlm_debug); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static struct ctl_table_header *sunrpc_table_header; static struct ctl_table sunrpc_table[]; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 56e4e150e80..ebbefad21a3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -49,13 +49,15 @@ #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/bc_xprt.h> +#include <trace/events/sunrpc.h> + #include "sunrpc.h" /* * Local variables */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_XPRT #endif @@ -772,11 +774,14 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) struct rpc_rqst *entry; list_for_each_entry(entry, &xprt->recv, rq_list) - if (entry->rq_xid == xid) + if (entry->rq_xid == xid) { + trace_xprt_lookup_rqst(xprt, xid, 0); return entry; + } dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", ntohl(xid)); + trace_xprt_lookup_rqst(xprt, xid, -ENOENT); xprt->stat.bad_xids++; return NULL; } @@ -810,6 +815,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", task->tk_pid, ntohl(req->rq_xid), copied); + trace_xprt_complete_rqst(xprt, req->rq_xid, copied); xprt->stat.recvs++; req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime); @@ -926,6 +932,7 @@ void xprt_transmit(struct rpc_task *task) req->rq_xtime = ktime_get(); status = xprt->ops->send_request(task); + trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { task->tk_status = status; return; @@ -1296,6 +1303,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { + int err; struct rpc_xprt *xprt; struct xprt_class *t; @@ -1336,6 +1344,12 @@ found: return ERR_PTR(-ENOMEM); } + err = rpc_xprt_debugfs_register(xprt); + if (err) { + xprt_destroy(xprt); + return ERR_PTR(err); + } + dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); out: @@ -1352,6 +1366,7 @@ static void xprt_destroy(struct rpc_xprt *xprt) dprintk("RPC: destroying transport %p\n", xprt); del_timer_sync(&xprt->timer); + rpc_xprt_debugfs_unregister(xprt); rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); rpc_destroy_wait_queue(&xprt->sending); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 6166c985fe2..df01d124936 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -49,11 +49,11 @@ #include <linux/highmem.h> -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char transfertypes[][12] = { "pure inline", /* no chunks */ " read chunk", /* some argument via rdma read */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6a4615dd026..bbd6155d3e3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -55,7 +55,7 @@ #include "xprt_rdma.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif @@ -73,9 +73,9 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 0; + int xprt_rdma_pad_optimize = 1; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; @@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_niovs == 0) rc = rpcrdma_marshal_req(rqst); - else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) rc = rpcrdma_marshal_chunks(rqst, 0); if (rc < 0) goto failed_marshal; @@ -705,7 +705,7 @@ static void __exit xprt_rdma_cleanup(void) int rc; dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; @@ -736,7 +736,7 @@ static int __init xprt_rdma_init(void) dprintk("\tPadding %d\n\tMemreg %d\n", xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 61c41298b4e..c98e4064391 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -57,11 +57,12 @@ * Globals/Macros */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); +static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); /* * internal functions @@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data) static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); +static const char * const async_event[] = { + "CQ error", + "QP fatal error", + "QP request error", + "QP access error", + "communication established", + "send queue drained", + "path migration successful", + "path mig error", + "device fatal error", + "port active", + "port error", + "LID change", + "P_key change", + "SM change", + "SRQ error", + "SRQ limit reached", + "last WQE reached", + "client reregister", + "GID change", +}; + +#define ASYNC_MSG(status) \ + ((status) < ARRAY_SIZE(async_event) ? \ + async_event[(status)] : "unknown async error") + +static void +rpcrdma_schedule_tasklet(struct list_head *sched_list) +{ + unsigned long flags; + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_splice_tail(sched_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); +} + static void rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: QP error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); @@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: CQ error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); @@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) struct list_head sched_list; struct ib_wc *wcs; int budget, count, rc; - unsigned long flags; INIT_LIST_HEAD(&sched_list); budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; @@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) rc = 0; out_schedule: - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - list_splice_tail(&sched_list, &rpcrdma_tasklets_g); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - tasklet_schedule(&rpcrdma_tasklet_g); + rpcrdma_schedule_tasklet(&sched_list); return rc; } @@ -309,11 +345,18 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) static void rpcrdma_flush_cqs(struct rpcrdma_ep *ep) { - rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep); - rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); + struct ib_wc wc; + LIST_HEAD(sched_list); + + while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) + rpcrdma_recvcq_process_wc(&wc, &sched_list); + if (!list_empty(&sched_list)) + rpcrdma_schedule_tasklet(&sched_list); + while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) + rpcrdma_sendcq_process_wc(&wc); } -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char * const conn[] = { "address resolved", "address error", @@ -344,7 +387,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; #endif struct ib_qp_attr attr; @@ -408,7 +451,7 @@ connected: break; } -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (connstate == 1) { int ird = attr.max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; @@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; - if (ep->rep_cqinit <= 2) + if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) + ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; + else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); ep->rep_ia = ia; @@ -866,8 +911,19 @@ retry: rpcrdma_ep_disconnect(ep, ia); rpcrdma_flush_cqs(ep); - if (ia->ri_memreg_strategy == RPCRDMA_FRMR) + switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: rpcrdma_reset_frmrs(ia); + break; + case RPCRDMA_MTHCAFMR: + rpcrdma_reset_fmrs(ia); + break; + case RPCRDMA_ALLPHYSICAL: + break; + default: + rc = -EIO; + goto out; + } xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, @@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } +/* After a disconnect, unmap all FMRs. + * + * This is invoked only in the transport connect worker in order + * to serialize with rpcrdma_register_fmr_external(). + */ +static void +rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) +{ + struct rpcrdma_xprt *r_xprt = + container_of(ia, struct rpcrdma_xprt, rx_ia); + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct list_head *pos; + struct rpcrdma_mw *r; + LIST_HEAD(l); + int rc; + + list_for_each(pos, &buf->rb_all) { + r = list_entry(pos, struct rpcrdma_mw, mw_all); + + INIT_LIST_HEAD(&l); + list_add(&r->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + if (rc) + dprintk("RPC: %s: ib_unmap_fmr failed %i\n", + __func__, rc); + } +} + /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in * an unusable state. Find FRMRs in this state and dereg / reg * each. FRMRs that are VALID and attached to an rpcrdma_req are @@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, break; default: - return -1; + return -EIO; } if (rc) - return -1; + return rc; return nsegs; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7fc9a3134..b799041b75b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -97,6 +97,12 @@ struct rpcrdma_ep { struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; }; +/* + * Force a signaled SEND Work Request every so often, + * in case the provider needs to do some housekeeping. + */ +#define RPCRDMA_MAX_UNSIGNALED_SENDS (32) + #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3b305ab17af..87ce7e8bb8d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -75,7 +75,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; * someone else's file names! */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; @@ -186,7 +186,7 @@ static struct ctl_table sunrpc_table[] = { */ #define XS_IDLE_DISC_TO (5U * 60 * HZ) -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_TRANS #endif @@ -216,65 +216,6 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif -struct sock_xprt { - struct rpc_xprt xprt; - - /* - * Network layer - */ - struct socket * sock; - struct sock * inet; - - /* - * State of TCP reply receive - */ - __be32 tcp_fraghdr, - tcp_xid, - tcp_calldir; - - u32 tcp_offset, - tcp_reclen; - - unsigned long tcp_copied, - tcp_flags; - - /* - * Connection of transports - */ - struct delayed_work connect_worker; - struct sockaddr_storage srcaddr; - unsigned short srcport; - - /* - * UDP socket buffer size parameters - */ - size_t rcvsize, - sndsize; - - /* - * Saved socket callback addresses - */ - void (*old_data_ready)(struct sock *); - void (*old_state_change)(struct sock *); - void (*old_write_space)(struct sock *); - void (*old_error_report)(struct sock *); -}; - -/* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_READ_CALLDIR (1UL << 4) -#define TCP_RCV_COPY_CALLDIR (1UL << 5) - -/* - * TCP RPC flags - */ -#define TCP_RPC_REPLY (1UL << 6) - static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) { return (struct rpc_xprt *) sk->sk_user_data; @@ -1415,6 +1356,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns dprintk("RPC: xs_tcp_data_recv started\n"); do { + trace_xs_tcp_data_recv(transport); /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) { @@ -1439,6 +1381,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /* Skip over any trailing bytes on short reads */ xs_tcp_read_discard(transport, &desc); } while (desc.count); + trace_xs_tcp_data_recv(transport); dprintk("RPC: xs_tcp_data_recv done\n"); return len - desc.count; } @@ -1454,12 +1397,15 @@ static void xs_tcp_data_ready(struct sock *sk) struct rpc_xprt *xprt; read_descriptor_t rd_desc; int read; + unsigned long total = 0; dprintk("RPC: xs_tcp_data_ready...\n"); read_lock_bh(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk))) + if (!(xprt = xprt_from_sock(sk))) { + read = 0; goto out; + } /* Any data means we had a useful conversation, so * the we don't need to delay the next reconnect */ @@ -1471,8 +1417,11 @@ static void xs_tcp_data_ready(struct sock *sk) do { rd_desc.count = 65536; read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + if (read > 0) + total += read; } while (read > 0); out: + trace_xs_tcp_data_ready(xprt, read, total); read_unlock_bh(&sk->sk_callback_lock); } @@ -3042,7 +2991,7 @@ static struct xprt_class xs_bc_tcp_transport = { */ int init_socket_xprt(void) { -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif @@ -3061,7 +3010,7 @@ int init_socket_xprt(void) */ void cleanup_socket_xprt(void) { -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; |