From e50a7a1a42335243c94eeea4a8d23413cb02370d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:12:46 +0400 Subject: NFS: make NFS client allocated per network namespace context This patch adds new net variable to nfs_client structure. This variable is set on NFS client creation and cheched during matching NFS client search. Initially current->nsproxy->net_ns is used as network namespace owner for new NFS client to create. This network namespace pointer is set during mount options parsing and thus can be passed from user-spave utils in future if will be necessary. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8102db9b926..02fb2001a28 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -123,6 +123,7 @@ struct nfs_parsed_mount_data { } nfs_server; struct security_mnt_opts lsm_opts; + struct net *net; }; /* mount_clnt.c */ -- cgit v1.2.3-70-g09d2 From 6d59b8d599d594bc314026c6856424fe49df5513 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:12:54 +0400 Subject: NFS: pass NFS client owner network namespace to RPC client creation routine This patch replaces static "init_net" with nfs_client->net pointer in RPC client creation calls. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- fs/nfs/internal.h | 1 + fs/nfs/mount_clnt.c | 4 ++-- fs/nfs/super.c | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ca016fe4460..64815b72540 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -647,7 +647,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { - .net = &init_net, + .net = clp->net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 02fb2001a28..d602188f889 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -138,6 +138,7 @@ struct nfs_mount_request { int noresvport; unsigned int *auth_flav_len; rpc_authflavor_t *auth_flavs; + struct net *net; }; extern int nfs_mount(struct nfs_mount_request *info); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index d4c2d6b7507..4fbe3a8e5e6 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) .rpc_resp = &result, }; struct rpc_create_args args = { - .net = &init_net, + .net = info->net, .protocol = info->protocol, .address = info->sap, .addrsize = info->salen, @@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) .to_retries = 2, }; struct rpc_create_args args = { - .net = &init_net, + .net = info->net, .protocol = IPPROTO_UDP, .address = info->sap, .addrsize = info->salen, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 73aa75649bf..e45feb0fee5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1625,6 +1625,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = &server_authlist_len, .auth_flavs = server_authlist, + .net = args->net, }; int status; -- cgit v1.2.3-70-g09d2 From eee17325f1dfbe004f1475743bab9e3d050d00f5 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 10 Jan 2012 16:13:19 +0400 Subject: NFS: idmap PipeFS notifier introduced v2: 1) Added "nfs_idmap_init" and "nfs_idmap_quit" definitions for kernels built without CONFIG_NFS_V4 option set. This patch subscribes NFS clients to RPC pipefs notifications. Idmap notifier is registering on NFS module load. This notifier callback is responsible for creation/destruction of PipeFS idmap pipe dentry for NFS4 clients. Since ipdmap pipe is created in rpc client pipefs directory, we have make sure, that this directory has been created already. IOW RPC client notifier callback has been called already. To achive this, PipeFS notifier priorities has been introduced (RPC clients notifier priority is greater than NFS idmap one). But this approach gives another problem: unlink for RPC client directory will be called before NFS idmap pipe unlink on UMOUNT event and will fail, because directory is not empty. The solution, introduced in this patch, is to try to remove client directory once again after idmap pipe was unlinked. This looks like ugly hack, so probably it should be replaced in some more elegant way. Note that no locking required in notifier callback because PipeFS superblock pointer is passed as an argument from it's creation or destruction routine and thus we can be sure about it's validity. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 +- fs/nfs/idmap.c | 75 ++++++++++++++++++++++++++++++++++++++ fs/nfs/internal.h | 4 ++ include/linux/nfs_idmap.h | 21 ++++++----- include/linux/sunrpc/rpc_pipe_fs.h | 7 ++++ net/sunrpc/clnt.c | 1 + net/sunrpc/rpc_pipe.c | 16 ++++++++ 7 files changed, 116 insertions(+), 12 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 64815b72540..ca9a4aa38df 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -52,8 +52,8 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -static DEFINE_SPINLOCK(nfs_client_lock); -static LIST_HEAD(nfs_client_list); +DEFINE_SPINLOCK(nfs_client_lock); +LIST_HEAD(nfs_client_list); static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 769274ed51c..ff084d258c4 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -377,6 +377,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, #include #include "nfs4_fs.h" +#include "internal.h" #define IDMAP_HASH_SZ 128 @@ -530,6 +531,80 @@ nfs_idmap_delete(struct nfs_client *clp) kfree(idmap); } +static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, + struct super_block *sb) +{ + int err = 0; + + switch (event) { + case RPC_PIPEFS_MOUNT: + BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); + err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, + clp->cl_idmap, + clp->cl_idmap->idmap_pipe); + break; + case RPC_PIPEFS_UMOUNT: + if (clp->cl_idmap->idmap_pipe) { + struct dentry *parent; + + parent = clp->cl_idmap->idmap_pipe->dentry->d_parent; + __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe); + /* + * Note: This is a dirty hack. SUNRPC hook has been + * called already but simple_rmdir() call for the + * directory returned with error because of idmap pipe + * inside. Thus now we have to remove this directory + * here. + */ + if (rpc_rmdir(parent)) + printk(KERN_ERR "%s: failed to remove clnt dir!\n", __func__); + } + break; + default: + printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); + return -ENOTSUPP; + } + return err; +} + +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct nfs_client *clp; + int error = 0; + + spin_lock(&nfs_client_lock); + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + if (clp->net != sb->s_fs_info) + continue; + if (clp->rpc_ops != &nfs_v4_clientops) + continue; + error = __rpc_pipefs_event(clp, event, sb); + if (error) + break; + } + spin_unlock(&nfs_client_lock); + return error; +} + +#define PIPEFS_NFS_PRIO 1 + +static struct notifier_block nfs_idmap_block = { + .notifier_call = rpc_pipefs_event, + .priority = SUNRPC_PIPEFS_NFS_PRIO, +}; + +int nfs_idmap_init(void) +{ + return rpc_pipefs_notifier_register(&nfs_idmap_block); +} + +void nfs_idmap_quit(void) +{ + rpc_pipefs_notifier_unregister(&nfs_idmap_block); +} + /* * Helper routines for manipulating the hashtable */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d602188f889..2b9836fe443 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -182,6 +182,10 @@ static inline void nfs_fs_proc_exit(void) { } #endif +#ifdef CONFIG_NFS_V4 +extern spinlock_t nfs_client_lock; +extern struct list_head nfs_client_list; +#endif /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 308c1887701..3c9eeb7da64 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -69,31 +69,32 @@ struct nfs_server; struct nfs_fattr; struct nfs4_string; -#ifdef CONFIG_NFS_USE_NEW_IDMAPPER - +#ifdef CONFIG_NFS_V4 int nfs_idmap_init(void); void nfs_idmap_quit(void); - -static inline int nfs_idmap_new(struct nfs_client *clp) +#else +static inline int nfs_idmap_init(void) { return 0; } -static inline void nfs_idmap_delete(struct nfs_client *clp) -{ -} +static inline void nfs_idmap_quit(void) +{} +#endif -#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */ +#ifdef CONFIG_NFS_USE_NEW_IDMAPPER -static inline int nfs_idmap_init(void) +static inline int nfs_idmap_new(struct nfs_client *clp) { return 0; } -static inline void nfs_idmap_quit(void) +static inline void nfs_idmap_delete(struct nfs_client *clp) { } +#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */ + int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 0d1f748f76d..ca32ebd14c1 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -49,6 +49,11 @@ RPC_I(struct inode *inode) return container_of(inode, struct rpc_inode, vfs_inode); } +enum { + SUNRPC_PIPEFS_NFS_PRIO, + SUNRPC_PIPEFS_RPC_PRIO, +}; + extern int rpc_pipefs_notifier_register(struct notifier_block *); extern void rpc_pipefs_notifier_unregister(struct notifier_block *); @@ -78,6 +83,8 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); +extern int rpc_rmdir(struct dentry *dentry); + struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); void rpc_destroy_pipe_data(struct rpc_pipe *pipe); extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ed7c22de931..4c684801716 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -222,6 +222,7 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, static struct notifier_block rpc_clients_block = { .notifier_call = rpc_pipefs_event, + .priority = SUNRPC_PIPEFS_RPC_PRIO, }; int rpc_clients_notifier_register(void) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6b417fcabdb..bae4e71d866 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -616,6 +616,22 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) return ret; } +int rpc_rmdir(struct dentry *dentry) +{ + struct dentry *parent; + struct inode *dir; + int error; + + parent = dget_parent(dentry); + dir = parent->d_inode; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + error = __rpc_rmdir(dir, dentry); + mutex_unlock(&dir->i_mutex); + dput(parent); + return error; +} +EXPORT_SYMBOL_GPL(rpc_rmdir); + static int __rpc_unlink(struct inode *dir, struct dentry *dentry) { int ret; -- cgit v1.2.3-70-g09d2 From babea479b75a9ea3d84ace6d880513e18397a8bb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 20 Jan 2012 17:19:56 +0400 Subject: NFS: remove unused nfs4_find_client_no_ident function Looks like this function survived after some cleanup patch without a reason. Now it's not called or referenced and I believe, that it can be simply removed. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 27 --------------------------- fs/nfs/internal.h | 1 - 2 files changed, 28 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index df60d9971b9..34c8d1cbf06 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1196,33 +1196,6 @@ error: } #ifdef CONFIG_NFS_V4 -/* - * NFSv4.0 callback thread helper - * - * Find a client by IP address, protocol version, and minorversion - * - * Called from the pg_authenticate method. The callback identifier - * is not used as it has not been decoded. - * - * Returns NULL if no such client - */ -struct nfs_client * -nfs4_find_client_no_ident(const struct sockaddr *addr) -{ - struct nfs_client *clp; - - spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, 0) == false) - continue; - atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); - return clp; - } - spin_unlock(&nfs_client_lock); - return NULL; -} - /* * NFSv4.0 callback thread helper * diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2b9836fe443..eda4cde40fb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -149,7 +149,6 @@ extern struct rpc_program nfs_program; extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); extern struct nfs_client *nfs4_find_client_ident(int); extern struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); -- cgit v1.2.3-70-g09d2 From a613fa168afc19179a7547fbba45644c5b6912bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Jan 2012 13:53:56 -0500 Subject: SUNRPC: constify the rpc_program Signed-off-by: Trond Myklebust --- fs/lockd/clnt4xdr.c | 2 +- fs/lockd/clntxdr.c | 8 ++++---- fs/lockd/mon.c | 8 ++++---- fs/nfs/client.c | 8 ++++---- fs/nfs/internal.h | 2 +- fs/nfs/mount_clnt.c | 10 +++++----- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs3xdr.c | 4 ++-- fs/nfs/nfs4xdr.c | 2 +- fs/nfsd/nfs4callback.c | 6 +++--- include/linux/lockd/lockd.h | 2 +- include/linux/lockd/xdr4.h | 2 +- include/linux/nfs_xdr.h | 10 +++++----- include/linux/sunrpc/clnt.h | 8 ++++---- include/linux/sunrpc/stats.h | 6 +++--- net/sunrpc/clnt.c | 8 ++++---- net/sunrpc/rpcb_clnt.c | 20 ++++++++++---------- 17 files changed, 54 insertions(+), 54 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index f848b52c67b..3ddcbb1c0a4 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -598,7 +598,7 @@ static struct rpc_procinfo nlm4_procedures[] = { PROC(GRANTED_RES, res, norep), }; -struct rpc_version nlm_version4 = { +const struct rpc_version nlm_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nlm4_procedures), .procs = nlm4_procedures, diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 180ac34feb9..3d35e3e80c1 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -596,19 +596,19 @@ static struct rpc_procinfo nlm_procedures[] = { PROC(GRANTED_RES, res, norep), }; -static struct rpc_version nlm_version1 = { +static const struct rpc_version nlm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static struct rpc_version nlm_version3 = { +static const struct rpc_version nlm_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static struct rpc_version *nlm_versions[] = { +static const struct rpc_version *nlm_versions[] = { [1] = &nlm_version1, [3] = &nlm_version3, #ifdef CONFIG_LOCKD_V4 @@ -618,7 +618,7 @@ static struct rpc_version *nlm_versions[] = { static struct rpc_stat nlm_rpc_stats; -struct rpc_program nlm_program = { +const struct rpc_program nlm_program = { .name = "lockd", .number = NLM_PROGRAM, .nrvers = ARRAY_SIZE(nlm_versions), diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 65ba36b80a9..c196030e530 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -47,7 +47,7 @@ struct nsm_res { u32 state; }; -static struct rpc_program nsm_program; +static const struct rpc_program nsm_program; static LIST_HEAD(nsm_handles); static DEFINE_SPINLOCK(nsm_lock); @@ -534,19 +534,19 @@ static struct rpc_procinfo nsm_procedures[] = { }, }; -static struct rpc_version nsm_version1 = { +static const struct rpc_version nsm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nsm_procedures), .procs = nsm_procedures }; -static struct rpc_version * nsm_version[] = { +static const struct rpc_version *nsm_version[] = { [1] = &nsm_version1, }; static struct rpc_stat nsm_stats; -static struct rpc_program nsm_program = { +static const struct rpc_program nsm_program = { .name = "statd", .number = NSM_PROGRAM, .nrvers = ARRAY_SIZE(nsm_version), diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 34c8d1cbf06..98af1cb28ee 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -89,7 +89,7 @@ static bool nfs4_disable_idmapping = true; /* * RPC cruft for NFS */ -static struct rpc_version *nfs_version[5] = { +static const struct rpc_version *nfs_version[5] = { [2] = &nfs_version2, #ifdef CONFIG_NFS_V3 [3] = &nfs_version3, @@ -99,7 +99,7 @@ static struct rpc_version *nfs_version[5] = { #endif }; -struct rpc_program nfs_program = { +const struct rpc_program nfs_program = { .name = "nfs", .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), @@ -115,11 +115,11 @@ struct rpc_stat nfs_rpcstat = { #ifdef CONFIG_NFS_V3_ACL static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { +static const struct rpc_version *nfsacl_version[] = { [3] = &nfsacl_version3, }; -struct rpc_program nfsacl_program = { +const struct rpc_program nfsacl_program = { .name = "nfsacl", .number = NFS_ACL_PROGRAM, .nrvers = ARRAY_SIZE(nfsacl_version), diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index eda4cde40fb..cdb121d3c6f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -145,7 +145,7 @@ extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ -extern struct rpc_program nfs_program; +extern const struct rpc_program nfs_program; extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 4fbe3a8e5e6..b37ca34af90 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -67,7 +67,7 @@ enum { MOUNTPROC3_EXPORT = 5, }; -static struct rpc_program mnt_program; +static const struct rpc_program mnt_program; /* * Defined by OpenGroup XNFS Version 3W, chapter 8 @@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = { }; -static struct rpc_version mnt_version1 = { +static const struct rpc_version mnt_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(mnt_procedures), .procs = mnt_procedures, }; -static struct rpc_version mnt_version3 = { +static const struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), .procs = mnt3_procedures, }; -static struct rpc_version *mnt_version[] = { +static const struct rpc_version *mnt_version[] = { NULL, &mnt_version1, NULL, @@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = { static struct rpc_stat mnt_stats; -static struct rpc_program mnt_program = { +static const struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = ARRAY_SIZE(mnt_version), diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 792cb13a430..1f56000fabb 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -1150,7 +1150,7 @@ struct rpc_procinfo nfs_procedures[] = { PROC(STATFS, fhandle, statfsres, 0), }; -struct rpc_version nfs_version2 = { +const struct rpc_version nfs_version2 = { .number = 2, .nrprocs = ARRAY_SIZE(nfs_procedures), .procs = nfs_procedures diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 183c6b123d0..a77cc9a3ce5 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2461,7 +2461,7 @@ struct rpc_procinfo nfs3_procedures[] = { PROC(COMMIT, commit, commit, 5), }; -struct rpc_version nfs_version3 = { +const struct rpc_version nfs_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nfs3_procedures), .procs = nfs3_procedures @@ -2489,7 +2489,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { }, }; -struct rpc_version nfsacl_version3 = { +const struct rpc_version nfsacl_version3 = { .number = 3, .nrprocs = sizeof(nfs3_acl_procedures)/ sizeof(nfs3_acl_procedures[0]), diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 95e92e43840..4633d405a94 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7109,7 +7109,7 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ }; -struct rpc_version nfs_version4 = { +const struct rpc_version nfs_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nfs4_procedures), .procs = nfs4_procedures diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 426ccb17165..0e262f32ac4 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -605,18 +605,18 @@ static struct rpc_version nfs_cb_version4 = { .procs = nfs4_cb_procedures }; -static struct rpc_version *nfs_cb_version[] = { +static const struct rpc_version *nfs_cb_version[] = { &nfs_cb_version4, }; -static struct rpc_program cb_program; +static const struct rpc_program cb_program; static struct rpc_stat cb_stats = { .program = &cb_program }; #define NFS4_CALLBACK 0x40000000 -static struct rpc_program cb_program = { +static const struct rpc_program cb_program = { .name = "nfs4_cb", .number = NFS4_CALLBACK, .nrvers = ARRAY_SIZE(nfs_cb_version), diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 88a114fce47..8949167a148 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -188,7 +188,7 @@ struct nlm_block { /* * Global variables */ -extern struct rpc_program nlm_program; +extern const struct rpc_program nlm_program; extern struct svc_procedure nlmsvc_procedures[]; #ifdef CONFIG_LOCKD_V4 extern struct svc_procedure nlmsvc_procedures4[]; diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 7353821341e..e58c88b52ce 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -42,6 +42,6 @@ int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *); */ -extern struct rpc_version nlm_version4; +extern const struct rpc_version nlm_version4; #endif /* LOCKD_XDR4_H */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f5188e10ea0..144419a9cbd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1274,11 +1274,11 @@ struct nfs_rpc_ops { extern const struct nfs_rpc_ops nfs_v2_clientops; extern const struct nfs_rpc_ops nfs_v3_clientops; extern const struct nfs_rpc_ops nfs_v4_clientops; -extern struct rpc_version nfs_version2; -extern struct rpc_version nfs_version3; -extern struct rpc_version nfs_version4; +extern const struct rpc_version nfs_version2; +extern const struct rpc_version nfs_version3; +extern const struct rpc_version nfs_version4; -extern struct rpc_version nfsacl_version3; -extern struct rpc_program nfsacl_program; +extern const struct rpc_version nfsacl_version3; +extern const struct rpc_program nfsacl_program; #endif diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4a46ffd73a0..a4c62e95c72 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -61,7 +61,7 @@ struct rpc_clnt { struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; - struct rpc_program * cl_program; + const struct rpc_program *cl_program; char *cl_principal; /* target to authenticate to */ }; @@ -73,7 +73,7 @@ struct rpc_program { const char * name; /* protocol name */ u32 number; /* program number */ unsigned int nrvers; /* number of versions */ - struct rpc_version ** version; /* version array */ + const struct rpc_version ** version; /* version array */ struct rpc_stat * stats; /* statistics */ const char * pipe_dir_name; /* path to rpc_pipefs dir */ }; @@ -109,7 +109,7 @@ struct rpc_create_args { struct sockaddr *saddress; const struct rpc_timeout *timeout; const char *servername; - struct rpc_program *program; + const struct rpc_program *program; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; @@ -128,7 +128,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, - struct rpc_program *, u32); + const struct rpc_program *, u32); void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index 76f3f7cc6e3..edc64219f92 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -12,7 +12,7 @@ #include struct rpc_stat { - struct rpc_program * program; + const struct rpc_program *program; unsigned int netcnt, netudpcnt, @@ -60,7 +60,7 @@ void rpc_modcount(struct inode *, int); #ifdef CONFIG_PROC_FS struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); void rpc_proc_unregister(struct net *,const char *); -void rpc_proc_zero(struct rpc_program *); +void rpc_proc_zero(const struct rpc_program *); struct proc_dir_entry * svc_proc_register(struct net *, struct svc_stat *, const struct file_operations *); void svc_proc_unregister(struct net *, const char *); @@ -71,7 +71,7 @@ void svc_seq_show(struct seq_file *, static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } static inline void rpc_proc_unregister(struct net *net, const char *p) {} -static inline void rpc_proc_zero(struct rpc_program *p) {} +static inline void rpc_proc_zero(const struct rpc_program *p) {} static inline struct proc_dir_entry *svc_proc_register(struct net *net, struct svc_stat *s, const struct file_operations *f) { return NULL; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1b2317fa404..db7220d8773 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -238,8 +238,8 @@ void rpc_clients_notifier_unregister(void) static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { - struct rpc_program *program = args->program; - struct rpc_version *version; + const struct rpc_program *program = args->program; + const struct rpc_version *version; struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; @@ -626,11 +626,11 @@ rpc_release_client(struct rpc_clnt *clnt) * The Sun NFSv2/v3 ACL protocol can do this. */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, - struct rpc_program *program, + const struct rpc_program *program, u32 vers) { struct rpc_clnt *clnt; - struct rpc_version *version; + const struct rpc_version *version; int err; BUG_ON(vers >= program->nrvers || !program->version[vers]); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d3978017b25..b1f08bd6788 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -112,7 +112,7 @@ enum { static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); -static struct rpc_program rpcb_program; +static const struct rpc_program rpcb_program; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -137,8 +137,8 @@ struct rpcb_info { struct rpc_procinfo * rpc_proc; }; -static struct rpcb_info rpcb_next_version[]; -static struct rpcb_info rpcb_next_version6[]; +static const struct rpcb_info rpcb_next_version[]; +static const struct rpcb_info rpcb_next_version6[]; static const struct rpc_call_ops rpcb_getport_ops = { .rpc_call_done = rpcb_getport_done, @@ -1051,7 +1051,7 @@ static struct rpc_procinfo rpcb_procedures4[] = { }, }; -static struct rpcb_info rpcb_next_version[] = { +static const struct rpcb_info rpcb_next_version[] = { { .rpc_vers = RPCBVERS_2, .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], @@ -1061,7 +1061,7 @@ static struct rpcb_info rpcb_next_version[] = { }, }; -static struct rpcb_info rpcb_next_version6[] = { +static const struct rpcb_info rpcb_next_version6[] = { { .rpc_vers = RPCBVERS_4, .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], @@ -1075,25 +1075,25 @@ static struct rpcb_info rpcb_next_version6[] = { }, }; -static struct rpc_version rpcb_version2 = { +static const struct rpc_version rpcb_version2 = { .number = RPCBVERS_2, .nrprocs = ARRAY_SIZE(rpcb_procedures2), .procs = rpcb_procedures2 }; -static struct rpc_version rpcb_version3 = { +static const struct rpc_version rpcb_version3 = { .number = RPCBVERS_3, .nrprocs = ARRAY_SIZE(rpcb_procedures3), .procs = rpcb_procedures3 }; -static struct rpc_version rpcb_version4 = { +static const struct rpc_version rpcb_version4 = { .number = RPCBVERS_4, .nrprocs = ARRAY_SIZE(rpcb_procedures4), .procs = rpcb_procedures4 }; -static struct rpc_version *rpcb_version[] = { +static const struct rpc_version *rpcb_version[] = { NULL, NULL, &rpcb_version2, @@ -1103,7 +1103,7 @@ static struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -static struct rpc_program rpcb_program = { +static const struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), -- cgit v1.2.3-70-g09d2 From 6b13168b36b6a7f603d962c232f1f2f325705832 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 23 Jan 2012 17:26:05 +0000 Subject: NFS: make nfs_client_list per net ns This patch splits global list of NFS clients into per-net-ns array of lists. This looks more strict and clearer. BTW, this patch also makes "/proc/fs/nfsfs/servers" entry content depends on /proc mount owner pid namespace. See below for details. NOTE: few words about how was /proc/fs/nfsfs/ entries content show per network namespace done. This is a little bit tricky and not the best is could be. But it's cheap (proper fix for /proc conteinerization is a hard nut to crack). The idea is simple: take proper network namespace from pid namespace child reaper nsproxy of /proc/ mount creator. This actually means, that if there are 2 containers with different net namespace sharing pid namespace, then read of /proc/fs/nfsfs/ entries will always return content, taken from net namespace of pid namespace creator task (and thus second namespace set wil be unvisible). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 38 +++++++++++++++++++++++++++----------- fs/nfs/idmap.c | 5 ++--- fs/nfs/inode.c | 1 + fs/nfs/internal.h | 2 +- fs/nfs/netns.h | 1 + 5 files changed, 32 insertions(+), 15 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 98af1cb28ee..058eb9bcfa9 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include @@ -49,11 +51,11 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT DEFINE_SPINLOCK(nfs_client_lock); -LIST_HEAD(nfs_client_list); static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 @@ -464,8 +466,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat { struct nfs_client *clp; const struct sockaddr *sap = data->addr; + struct nfs_net *nn = net_generic(data->net, nfs_net_id); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) @@ -483,9 +486,6 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat /* Match the full socket address */ if (!nfs_sockaddr_cmp(sap, clap)) continue; - /* Match network namespace */ - if (clp->net != data->net) - continue; atomic_inc(&clp->cl_count); return clp; @@ -506,6 +506,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; int error; + struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); dprintk("--> nfs_get_client(%s,v%u)\n", cl_init->hostname ?: "", cl_init->rpc_ops->version); @@ -531,7 +532,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, /* install a new client and return with it unready */ install_client: clp = new; - list_add(&clp->cl_share_link, &nfs_client_list); + list_add(&clp->cl_share_link, &nn->nfs_client_list); spin_unlock(&nfs_client_lock); error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, @@ -1227,9 +1228,10 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs4_sessionid *sid) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(&init_net, nfs_net_id); spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (nfs4_cb_match_client(addr, clp, 1) == false) continue; @@ -1757,6 +1759,13 @@ out_free_server: return ERR_PTR(error); } +void nfs_clients_init(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + INIT_LIST_HEAD(&nn->nfs_client_list); +} + #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_fs_nfs; @@ -1810,13 +1819,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; + struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; + struct net *net = pid_ns->child_reaper->nsproxy->net_ns; ret = seq_open(file, &nfs_server_list_ops); if (ret < 0) return ret; m = file->private_data; - m->private = PDE(inode)->data; + m->private = net; return 0; } @@ -1826,9 +1837,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) */ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) { + struct nfs_net *nn = net_generic(m->private, nfs_net_id); + /* lock the list against modification */ spin_lock(&nfs_client_lock); - return seq_list_start_head(&nfs_client_list, *_pos); + return seq_list_start_head(&nn->nfs_client_list, *_pos); } /* @@ -1836,7 +1849,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) { - return seq_list_next(v, &nfs_client_list, pos); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + return seq_list_next(v, &nn->nfs_client_list, pos); } /* @@ -1853,9 +1868,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v) static int nfs_server_list_show(struct seq_file *m, void *v) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* display header on line 1 */ - if (v == &nfs_client_list) { + if (v == &nn->nfs_client_list) { seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); return 0; } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 83f7d42d5c7..2f78f0ce266 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -532,13 +532,12 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct super_block *sb = ptr; + struct nfs_net *nn = net_generic(sb->s_fs_info, nfs_net_id); struct nfs_client *clp; int error = 0; spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { - if (clp->net != sb->s_fs_info) - continue; + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (clp->rpc_ops != &nfs_v4_clientops) continue; error = __rpc_pipefs_event(clp, event, sb); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 028464bcbe0..0365b84cc2c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1558,6 +1558,7 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { + nfs_clients_init(net); return nfs_dns_resolver_cache_init(net); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cdb121d3c6f..a9ae8069fff 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -146,6 +146,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; +extern void nfs_clients_init(struct net *net); extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); @@ -183,7 +184,6 @@ static inline void nfs_fs_proc_exit(void) #endif #ifdef CONFIG_NFS_V4 extern spinlock_t nfs_client_lock; -extern struct list_head nfs_client_list; #endif /* nfs4namespace.c */ diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 39ae4cad5b4..feb33c3f9a5 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -7,6 +7,7 @@ struct nfs_net { struct cache_detail *nfs_dns_resolve; struct rpc_pipe *bl_device_pipe; + struct list_head nfs_client_list; }; extern int nfs_net_id; -- cgit v1.2.3-70-g09d2 From 28cd1b3f262dba56b5e335ba668e342d530f6129 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 23 Jan 2012 17:26:22 +0000 Subject: NFS: make cb_ident_idr per net ns This patch makes ID's infrastructure network namespace aware. This was done mainly because of nfs_client_lock, which is desired to be per network namespace, but protects NFS clients ID's. NOTE: NFS client's net pointer have to be set prior to ID initialization, proper assignment was moved. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- fs/nfs/client.c | 28 ++++++++++++++++++---------- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 4 ++-- fs/nfs/netns.h | 3 +++ 5 files changed, 25 insertions(+), 14 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 2f45aa71742..e14af46bd2c 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -876,7 +876,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_garbage_args; if (hdr_arg.minorversion == 0) { - cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); + cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d58e8386e6b..f51b2795ce0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -58,7 +58,6 @@ DEFINE_SPINLOCK(nfs_client_lock); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 -static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ /* * Get a unique NFSv4.0 callback identifier which will be used @@ -67,14 +66,15 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) { int ret = 0; + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; retry: - if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) + if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) return -ENOMEM; spin_lock(&nfs_client_lock); - ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); + ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); spin_unlock(&nfs_client_lock); if (ret == -EAGAIN) goto retry; @@ -173,6 +173,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; + clp->net = cl_init->net; #ifdef CONFIG_NFS_V4 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); @@ -191,7 +192,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ if (!IS_ERR(cred)) clp->cl_machine_cred = cred; nfs_fscache_get_client_cookie(clp); - clp->net = cl_init->net; return clp; @@ -236,16 +236,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp) } /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ -void nfs_cleanup_cb_ident_idr(void) +void nfs_cleanup_cb_ident_idr(struct net *net) { - idr_destroy(&cb_ident_idr); + struct nfs_net *nn = net_generic(net, nfs_net_id); + + idr_destroy(&nn->cb_ident_idr); } /* nfs_client_lock held */ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) { + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); + if (clp->cl_cb_ident) - idr_remove(&cb_ident_idr, clp->cl_cb_ident); + idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); } static void pnfs_init_server(struct nfs_server *server) @@ -263,7 +267,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) { } -void nfs_cleanup_cb_ident_idr(void) +void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -1203,12 +1207,13 @@ error: * Find a client by callback identifier */ struct nfs_client * -nfs4_find_client_ident(int cb_ident) +nfs4_find_client_ident(struct net *net, int cb_ident) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); spin_lock(&nfs_client_lock); - clp = idr_find(&cb_ident_idr, cb_ident); + clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) atomic_inc(&clp->cl_count); spin_unlock(&nfs_client_lock); @@ -1765,6 +1770,9 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_client_list); INIT_LIST_HEAD(&nn->nfs_volume_list); +#ifdef CONFIG_NFS_V4 + idr_init(&nn->cb_ident_idr); +#endif } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0365b84cc2c..6c662598f88 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1565,6 +1565,7 @@ static int nfs_net_init(struct net *net) static void nfs_net_exit(struct net *net) { nfs_dns_resolver_cache_destroy(net); + nfs_cleanup_cb_ident_idr(net); } static struct pernet_operations nfs_net_ops = { @@ -1674,7 +1675,6 @@ static void __exit exit_nfs_fs(void) #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif - nfs_cleanup_cb_ident_idr(); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a9ae8069fff..958fff2927c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -148,9 +148,9 @@ extern void nfs_umount(const struct nfs_mount_request *info); extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); -extern void nfs_cleanup_cb_ident_idr(void); +extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs4_find_client_ident(int); +extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 0fbd4e017d2..547cc9525ba 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -9,6 +9,9 @@ struct nfs_net { struct rpc_pipe *bl_device_pipe; struct list_head nfs_client_list; struct list_head nfs_volume_list; +#ifdef CONFIG_NFS_V4 + struct idr cb_ident_idr; /* Protected by nfs_client_lock */ +#endif }; extern int nfs_net_id; -- cgit v1.2.3-70-g09d2 From dc03085834a4530b2514708a643cd3fe38f35b21 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 23 Jan 2012 17:26:31 +0000 Subject: NFS: make nfs_client_lock per net ns This patch makes nfs_clients_lock allocated per network namespace. All items it protects are already network namespace aware. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 51 +++++++++++++++++++++++++++++---------------------- fs/nfs/idmap.c | 4 ++-- fs/nfs/internal.h | 3 --- fs/nfs/netns.h | 1 + 4 files changed, 32 insertions(+), 27 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f51b2795ce0..9e11d298883 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -55,7 +55,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -DEFINE_SPINLOCK(nfs_client_lock); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 @@ -73,9 +72,9 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) retry: if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) return -ENOMEM; - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); if (ret == -EAGAIN) goto retry; return ret; @@ -313,15 +312,18 @@ static void nfs_free_client(struct nfs_client *clp) */ void nfs_put_client(struct nfs_client *clp) { + struct nfs_net *nn; + if (!clp) return; dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); + nn = net_generic(clp->net, nfs_net_id); - if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { + if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); BUG_ON(!list_empty(&clp->cl_superblocks)); @@ -516,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, /* see if the client already exists */ do { - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); clp = nfs_match_client(cl_init); if (clp) @@ -524,7 +526,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (new) goto install_client; - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); new = nfs_alloc_client(cl_init); } while (!IS_ERR(new)); @@ -536,7 +538,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, install_client: clp = new; list_add(&clp->cl_share_link, &nn->nfs_client_list); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, authflavour, noresvport); @@ -551,7 +553,7 @@ install_client: * - make sure it's ready before returning */ found_client: - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); if (new) nfs_free_client(new); @@ -1041,24 +1043,25 @@ static void nfs_server_insert_lists(struct nfs_server *server) struct nfs_client *clp = server->nfs_client; struct nfs_net *nn = net_generic(clp->net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); list_add_tail(&server->master_link, &nn->nfs_volume_list); clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); } static void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_del_rcu(&server->client_link); if (clp && list_empty(&clp->cl_superblocks)) set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); list_del(&server->master_link); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); synchronize_rcu(); } @@ -1212,11 +1215,11 @@ nfs4_find_client_ident(struct net *net, int cb_ident) struct nfs_client *clp; struct nfs_net *nn = net_generic(net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return clp; } @@ -1235,7 +1238,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs_client *clp; struct nfs_net *nn = net_generic(&init_net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (nfs4_cb_match_client(addr, clp, 1) == false) continue; @@ -1249,10 +1252,10 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, continue; atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return clp; } - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return NULL; } @@ -1849,7 +1852,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* lock the list against modification */ - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); return seq_list_start_head(&nn->nfs_client_list, *_pos); } @@ -1868,7 +1871,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_server_list_stop(struct seq_file *p, void *v) { - spin_unlock(&nfs_client_lock); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + spin_unlock(&nn->nfs_client_lock); } /* @@ -1930,7 +1935,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* lock the list against modification */ - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); return seq_list_start_head(&nn->nfs_volume_list, *_pos); } @@ -1949,7 +1954,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_volume_list_stop(struct seq_file *p, void *v) { - spin_unlock(&nfs_client_lock); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + spin_unlock(&nn->nfs_client_lock); } /* diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2f78f0ce266..d2afcd8354e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -536,7 +536,7 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, struct nfs_client *clp; int error = 0; - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (clp->rpc_ops != &nfs_v4_clientops) continue; @@ -544,7 +544,7 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, if (error) break; } - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return error; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 958fff2927c..b38b73347af 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -182,9 +182,6 @@ static inline void nfs_fs_proc_exit(void) { } #endif -#ifdef CONFIG_NFS_V4 -extern spinlock_t nfs_client_lock; -#endif /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 547cc9525ba..7baad89ae60 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -12,6 +12,7 @@ struct nfs_net { #ifdef CONFIG_NFS_V4 struct idr cb_ident_idr; /* Protected by nfs_client_lock */ #endif + spinlock_t nfs_client_lock; }; extern int nfs_net_id; -- cgit v1.2.3-70-g09d2 From c7add9a9720ff5be4715f7a0bb0d9578b2e8534e Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Thu, 26 Jan 2012 15:11:49 +0400 Subject: NFS: search for client session id in proper network namespace Network namespace is taken from request transport and passed as a part of cb_process_state structure. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 1 + fs/nfs/callback_proc.c | 2 +- fs/nfs/callback_xdr.c | 1 + fs/nfs/client.c | 4 ++-- fs/nfs/internal.h | 3 ++- 5 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index c89d3b9e483..197e0d3754c 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -39,6 +39,7 @@ struct cb_process_state { __be32 drc_status; struct nfs_client *clp; int slotid; + struct net *net; }; struct cb_compound_hdr_arg { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0e6e63f55db..f71978d107d 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -461,7 +461,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, int i; __be32 status = htonl(NFS4ERR_BADSESSION); - clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); + clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e14af46bd2c..2e372240d02 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -861,6 +861,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r .drc_status = 0, .clp = NULL, .slotid = -1, + .net = rqstp->rq_xprt->xpt_net, }; unsigned int nops = 0; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9e11d298883..2328dcbf6c0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1232,11 +1232,11 @@ nfs4_find_client_ident(struct net *net, int cb_ident) * Returns NULL if no such client */ struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *addr, +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, struct nfs4_sessionid *sid) { struct nfs_client *clp; - struct nfs_net *nn = net_generic(&init_net, nfs_net_id); + struct nfs_net *nn = net_generic(net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b38b73347af..0c3648a947d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -152,7 +152,8 @@ extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); +nfs4_find_client_sessionid(struct net *, const struct sockaddr *, + struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); -- cgit v1.2.3-70-g09d2 From d6d6dc7cdfda7c8f49a89a7b7261846f319da6d1 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 8 Mar 2012 17:29:35 -0500 Subject: NFS: remove nfs_inode radix tree The radix tree is only being used to compile lists of reqs needing commit. It is simpler to just put the reqs directly into a list. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 2 + fs/nfs/nfs4filelayout.c | 109 +++++++++++++++++++++++++++++++++--------- fs/nfs/nfs4filelayout.h | 7 ++- fs/nfs/pagelist.c | 61 ------------------------ fs/nfs/pnfs.h | 82 ++++++++++++++++---------------- fs/nfs/write.c | 120 ++++++++++++++++++++++++++--------------------- include/linux/nfs_fs.h | 6 +-- include/linux/nfs_page.h | 13 +---- 9 files changed, 208 insertions(+), 194 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 70e25c9c567..1a19f8d30c1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1560,7 +1560,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + INIT_LIST_HEAD(&nfsi->commit_list); nfsi->npages = 0; nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0c3648a947d..04a914704e7 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -308,6 +308,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, + int max); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 768f6f86c9f..716fac6bc08 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -682,14 +682,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, int size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); + fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); if (!fl->commit_buckets) { filelayout_free_lseg(&fl->generic_hdr); return NULL; } fl->number_of_buckets = size; - for (i = 0; i < size; i++) - INIT_LIST_HEAD(&fl->commit_buckets[i]); + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&fl->commit_buckets[i].written); + INIT_LIST_HEAD(&fl->commit_buckets[i].committing); + } } return &fl->generic_hdr; } @@ -767,11 +769,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_doio = pnfs_generic_pg_writepages, }; -static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) -{ - return !FILELAYOUT_LSEG(lseg)->commit_through_mds; -} - static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) { if (fl->stripe_type == STRIPE_SPARSE) @@ -780,13 +777,39 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) return j; } -struct list_head *filelayout_choose_commit_list(struct nfs_page *req) +/* The generic layer is about to remove the req from the commit list. + * If this will make the bucket empty, it will need to put the lseg reference. + * Note inode lock is held, so we can't do the put here. + */ +static struct pnfs_layout_segment * +filelayout_remove_commit_req(struct nfs_page *req) +{ + if (list_is_singular(&req->wb_list)) { + struct inode *inode = req->wb_context->dentry->d_inode; + struct pnfs_layout_segment *lseg; + + /* From here we can find the bucket, but for the moment, + * since there is only one relevant lseg... + */ + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { + if (lseg->pls_range.iomode == IOMODE_RW) + return lseg; + } + } + return NULL; +} + +static struct list_head * +filelayout_choose_commit_list(struct nfs_page *req, + struct pnfs_layout_segment *lseg) { - struct pnfs_layout_segment *lseg = req->wb_commit_lseg; struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; + if (fl->commit_through_mds) + return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; + /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive * alternative is to add a field to nfs_write_data and nfs_page @@ -796,9 +819,14 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req) j = nfs4_fl_calc_j_index(lseg, (loff_t)req->wb_index << PAGE_CACHE_SHIFT); i = select_bucket_index(fl, j); - list = &fl->commit_buckets[i]; + list = &fl->commit_buckets[i].written; if (list_empty(list)) { - /* Non-empty buckets hold a reference on the lseg */ + /* Non-empty buckets hold a reference on the lseg. That ref + * is normally transferred to the COMMIT call and released + * there. It could also be released if the last req is pulled + * off due to a rewrite, in which case it will be done in + * filelayout_remove_commit_req + */ get_lseg(lseg); } return list; @@ -860,18 +888,56 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) /* * This is only useful while we are using whole file layouts. */ -static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) +static struct pnfs_layout_segment * +find_only_write_lseg_locked(struct inode *inode) { - struct pnfs_layout_segment *lseg, *rv = NULL; + struct pnfs_layout_segment *lseg; - spin_lock(&inode->i_lock); list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) if (lseg->pls_range.iomode == IOMODE_RW) - rv = get_lseg(lseg); + return get_lseg(lseg); + return NULL; +} + +static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) +{ + struct pnfs_layout_segment *rv; + + spin_lock(&inode->i_lock); + rv = find_only_write_lseg_locked(inode); spin_unlock(&inode->i_lock); return rv; } +/* Move reqs from written to committing lists, returning count of number moved. + * Note called with i_lock held. + */ +static int filelayout_scan_commit_lists(struct inode *inode, int max) +{ + struct pnfs_layout_segment *lseg; + struct nfs4_filelayout_segment *fl; + int i, rv = 0, cnt; + + lseg = find_only_write_lseg_locked(inode); + if (!lseg) + return 0; + fl = FILELAYOUT_LSEG(lseg); + if (fl->commit_through_mds) + goto out_put; + for (i = 0; i < fl->number_of_buckets; i++) { + if (list_empty(&fl->commit_buckets[i].written)) + continue; + cnt = nfs_scan_commit_list(&fl->commit_buckets[i].written, + &fl->commit_buckets[i].committing, + max); + max -= cnt; + rv += cnt; + } +out_put: + put_lseg(lseg); + return rv; +} + static int alloc_ds_commits(struct inode *inode, struct list_head *list) { struct pnfs_layout_segment *lseg; @@ -886,7 +952,7 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list) return 0; fl = FILELAYOUT_LSEG(lseg); for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i])) + if (list_empty(&fl->commit_buckets[i].committing)) continue; data = nfs_commitdata_alloc(); if (!data) @@ -900,9 +966,9 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list) out_bad: for (j = i; j < fl->number_of_buckets; j++) { - if (list_empty(&fl->commit_buckets[i])) + if (list_empty(&fl->commit_buckets[i].committing)) continue; - nfs_retry_commit(&fl->commit_buckets[i], lseg); + nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); put_lseg(lseg); /* associated with emptying bucket */ } put_lseg(lseg); @@ -937,7 +1003,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); } else { - nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); + nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); filelayout_initiate_commit(data, how); } } @@ -967,8 +1033,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, - .mark_pnfs_commit = filelayout_mark_pnfs_commit, .choose_commit_list = filelayout_choose_commit_list, + .remove_commit_req = filelayout_remove_commit_req, + .scan_commit_lists = filelayout_scan_commit_lists, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2e42284253f..21190bb1f5e 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr { struct nfs4_pnfs_ds *ds_list[1]; }; +struct nfs4_fl_commit_bucket { + struct list_head written; + struct list_head committing; +}; + struct nfs4_filelayout_segment { struct pnfs_layout_segment generic_hdr; u32 stripe_type; @@ -84,7 +89,7 @@ struct nfs4_filelayout_segment { struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ unsigned int num_fh; struct nfs_fh **fh_array; - struct list_head *commit_buckets; /* Sort commits to ds */ + struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ int number_of_buckets; }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fc5b54b84f8..d21fceaa9f6 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -396,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) } } -#define NFS_SCAN_MAXENTRIES 16 -/** - * nfs_scan_list - Scan a list for matching requests - * @nfsi: NFS inode - * @dst: Destination list - * @idx_start: lower bound of page->index to scan - * @npages: idx_start + npages sets the upper bound to scan. - * @tag: tag to scan for - * - * Moves elements from one of the inode request lists. - * If the number of requests is set to 0, the entire address_space - * starting at index idx_start, is scanned. - * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's i_lock when calling this function - */ -int nfs_scan_list(struct nfs_inode *nfsi, - struct list_head *dst, pgoff_t idx_start, - unsigned int npages, int tag) -{ - struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; - struct nfs_page *req; - pgoff_t idx_end; - int found, i; - int res; - struct list_head *list; - - res = 0; - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - for (;;) { - found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, - (void **)&pgvec[0], idx_start, - NFS_SCAN_MAXENTRIES, tag); - if (found <= 0) - break; - for (i = 0; i < found; i++) { - req = pgvec[i]; - if (req->wb_index > idx_end) - goto out; - idx_start = req->wb_index + 1; - if (nfs_lock_request_dontget(req)) { - kref_get(&req->wb_kref); - radix_tree_tag_clear(&nfsi->nfs_page_tree, - req->wb_index, tag); - list = pnfs_choose_commit_list(req, dst); - nfs_list_add_request(req, list); - res++; - if (res == INT_MAX) - goto out; - } - } - /* for latency reduction */ - cond_resched_lock(&nfsi->vfs_inode.i_lock); - } -out: - return res; -} - int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8088d51f495..ef92f676cf1 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; - /* Returns true if layoutdriver wants to divert this request to - * driver's commit routine. - */ - bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); - struct list_head * (*choose_commit_list) (struct nfs_page *req); + struct list_head * (*choose_commit_list) (struct nfs_page *req, + struct pnfs_layout_segment *lseg); + struct pnfs_layout_segment *(*remove_commit_req) (struct nfs_page *req); + int (*scan_commit_lists) (struct inode *inode, int max); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); /* @@ -262,20 +261,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) return nfss->pnfs_curr_ld != NULL; } -static inline void -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) -{ - if (lseg) { - struct pnfs_layoutdriver_type *ld; - - ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; - if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { - set_bit(PG_PNFS_COMMIT, &req->wb_flags); - req->wb_commit_lseg = get_lseg(lseg); - } - } -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { @@ -285,26 +270,38 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) } static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) { + struct inode *inode = req->wb_context->dentry->d_inode; struct list_head *rv; - if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { - struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; - - set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); - rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); - /* matched by ref taken when PG_PNFS_COMMIT is set */ - put_lseg(req->wb_commit_lseg); - } else - rv = mds; + if (lseg && NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list) + rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req, lseg); + else + rv = &NFS_I(inode)->commit_list; return rv; } -static inline void pnfs_clear_request_commit(struct nfs_page *req) +static inline struct pnfs_layout_segment * +pnfs_clear_request_commit(struct nfs_page *req) { - if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) - put_lseg(req->wb_commit_lseg); + struct inode *inode = req->wb_context->dentry->d_inode; + + if (NFS_SERVER(inode)->pnfs_curr_ld && + NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req) + return NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req(req); + else + return NULL; +} + +static inline int +pnfs_scan_commit_lists(struct inode *inode, int max) +{ + if (NFS_SERVER(inode)->pnfs_curr_ld && + NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists) + return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(inode, max); + else + return 0; } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -400,11 +397,6 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st return false; } -static inline void -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) -{ -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { @@ -412,13 +404,23 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) } static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) { - return mds; + struct inode *inode = req->wb_context->dentry->d_inode; + + return &NFS_I(inode)->commit_list; } -static inline void pnfs_clear_request_commit(struct nfs_page *req) +static inline struct pnfs_layout_segment * +pnfs_clear_request_commit(struct nfs_page *req) { + return NULL; +} + +static inline int +pnfs_scan_commit_lists(struct inode *inode, int max) +{ + return 0; } static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fd8a4f07bc0..a630ad65d64 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -375,21 +375,14 @@ out_err: /* * Insert a write request into an inode */ -static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); - int error; - - error = radix_tree_preload(GFP_NOFS); - if (error != 0) - goto out; /* Lock the request! */ nfs_lock_request_dontget(req); spin_lock(&inode->i_lock); - error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); - BUG_ON(error); if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) inode->i_version++; set_bit(PG_MAPPED, &req->wb_flags); @@ -398,11 +391,10 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfsi->npages++; kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); - radix_tree_preload_end(); -out: - return error; } +static struct pnfs_layout_segment *nfs_clear_request_commit(struct nfs_page *req); + /* * Remove a write request from an inode */ @@ -410,16 +402,18 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct pnfs_layout_segment *lseg; BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); + lseg = nfs_clear_request_commit(req); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); clear_bit(PG_MAPPED, &req->wb_flags); - radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; spin_unlock(&inode->i_lock); + put_lseg(lseg); nfs_release_request(req); } @@ -438,31 +432,38 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct list_head *clist; + clist = pnfs_choose_commit_list(req, lseg); spin_lock(&inode->i_lock); set_bit(PG_CLEAN, &(req)->wb_flags); - radix_tree_tag_set(&nfsi->nfs_page_tree, - req->wb_index, - NFS_PAGE_TAG_COMMIT); + nfs_list_add_request(req, clist); nfsi->ncommit++; spin_unlock(&inode->i_lock); - pnfs_mark_request_commit(req, lseg); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } -static int +static void +nfs_clear_page_commit(struct page *page) +{ + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); +} + +static struct pnfs_layout_segment * nfs_clear_request_commit(struct nfs_page *req) { - struct page *page = req->wb_page; + struct pnfs_layout_segment *lseg = NULL; if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { - dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); - return 1; + nfs_clear_page_commit(req->wb_page); + lseg = pnfs_clear_request_commit(req); + NFS_I(req->wb_context->dentry->d_inode)->ncommit--; + list_del(&req->wb_list); } - return 0; + return lseg; } static inline @@ -494,10 +495,10 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } -static inline int +static inline struct pnfs_layout_segment * nfs_clear_request_commit(struct nfs_page *req) { - return 0; + return NULL; } static inline @@ -518,46 +519,67 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, static int nfs_need_commit(struct nfs_inode *nfsi) { - return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); + return nfsi->ncommit > 0; } +/* i_lock held by caller */ +int +nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max) +{ + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (nfs_lock_request_dontget(req)) { + kref_get(&req->wb_kref); + list_move_tail(&req->wb_list, dst); + clear_bit(PG_CLEAN, &(req)->wb_flags); + ret++; + if (ret == max) + break; + } + } + return ret; +} +EXPORT_SYMBOL_GPL(nfs_scan_commit_list); + /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan * @dst: destination list - * @idx_start: lower bound of page->index to scan. - * @npages: idx_start + npages sets the upper bound to scan. * * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +nfs_scan_commit(struct inode *inode, struct list_head *dst) { struct nfs_inode *nfsi = NFS_I(inode); - int ret; - - if (!nfs_need_commit(nfsi)) - return 0; + int ret = 0; spin_lock(&inode->i_lock); - ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); - if (ret > 0) + if (nfsi->ncommit > 0) { + int pnfs_ret; + + ret = nfs_scan_commit_list(&nfsi->commit_list, dst, INT_MAX); + pnfs_ret = pnfs_scan_commit_lists(inode, INT_MAX - ret); + if (pnfs_ret) { + ret += pnfs_ret; + set_bit(NFS_INO_PNFS_COMMIT, &nfsi->flags); + } nfsi->ncommit -= ret; + } spin_unlock(&inode->i_lock); - - if (nfs_need_commit(NFS_I(inode))) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return ret; } + #else static inline int nfs_need_commit(struct nfs_inode *nfsi) { return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) { return 0; } @@ -579,6 +601,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, unsigned int rqend; unsigned int end; int error; + struct pnfs_layout_segment *lseg = NULL; if (!PagePrivate(page)) return NULL; @@ -614,12 +637,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - if (nfs_clear_request_commit(req) && - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { - NFS_I(inode)->ncommit--; - pnfs_clear_request_commit(req); - } + lseg = nfs_clear_request_commit(req); /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { @@ -632,6 +650,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); + put_lseg(lseg); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -653,7 +672,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, { struct inode *inode = page->mapping->host; struct nfs_page *req; - int error; req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) @@ -661,11 +679,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(req)) goto out; - error = nfs_inode_add_request(inode, req); - if (error != 0) { - nfs_release_request(req); - req = ERR_PTR(error); - } + nfs_inode_add_request(inode, req); out: return req; } @@ -1458,7 +1472,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - nfs_clear_request_commit(req); + nfs_clear_page_commit(req->wb_page); dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->dentry->d_sb->s_id, @@ -1515,7 +1529,7 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_commit_set_lock(NFS_I(inode), may_wait); if (res <= 0) goto out_mark_dirty; - res = nfs_scan_commit(inode, &head, 0, 0); + res = nfs_scan_commit(inode, &head); if (res) { int error; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ce8e4361ad1..0a63ab2b5a7 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -171,13 +171,9 @@ struct nfs_inode { */ __be32 cookieverf[2]; - /* - * This is the list of dirty unwritten pages. - */ - struct radix_tree_root nfs_page_tree; - unsigned long npages; unsigned long ncommit; + struct list_head commit_list; /* Open contexts for shared mmap writes */ struct list_head open_files; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 65b563f0903..50856e9c1e5 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -18,11 +18,6 @@ #include -/* - * Valid flags for the radix tree - */ -#define NFS_PAGE_TAG_COMMIT 1 - /* * Valid flags for a dirty buffer */ @@ -32,16 +27,12 @@ enum { PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, - PG_PNFS_COMMIT, PG_PARTIAL_READ_FAILED, }; struct nfs_inode; struct nfs_page { - union { - struct list_head wb_list; /* Defines state of page: */ - struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */ - }; + struct list_head wb_list; /* Defines state of page: */ struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ @@ -89,8 +80,6 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, extern void nfs_release_request(struct nfs_page *req); -extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, - pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, -- cgit v1.2.3-70-g09d2 From 8dd3775889345850ecddd689b5c200cdd91bd8c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Mar 2012 17:16:40 -0400 Subject: NFSv4.1: Clean ups and bugfixes for the pNFS read/writeback/commit code Move more pnfs-isms out of the generic commit code. Bugfixes: - filelayout_scan_commit_lists doesn't need to get/put the lseg. In fact since it is run under the inode->i_lock, the lseg_put() can deadlock. - Ensure that we distinguish between what needs to be done for commit-to-data server and what needs to be done for commit-to-MDS using the new flag PG_COMMIT_TO_DS. Otherwise we may end up calling put_lseg() on a bucket for a struct nfs_page that got written through the MDS. - Fix a case where we were using list_del() on an nfs_page->wb_list instead of list_del_init(). - filelayout_initiate_commit needs to call filelayout_commit_release on error instead of the mds_ops->rpc_release(). Otherwise it won't clear the commit lock. Cleanups: - Let the files layout manage the commit lists for the pNFS case. Don't expose stuff like pnfs_choose_commit_list, and the fact that the commit buckets hold references to the layout segment in common code. - Cast out the put_lseg() calls for the struct nfs_read/write_data->lseg into the pNFS layer from whence they came. - Let the pNFS layer manage the NFS_INO_PNFS_COMMIT bit. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/internal.h | 4 +- fs/nfs/nfs4filelayout.c | 82 +++++++++++++++++++++++-------- fs/nfs/pnfs.c | 3 ++ fs/nfs/pnfs.h | 55 ++++++++++----------- fs/nfs/read.c | 1 - fs/nfs/write.c | 124 +++++++++++++++++++++++++++++------------------ include/linux/nfs_page.h | 11 +++++ 7 files changed, 183 insertions(+), 97 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 04a914704e7..2476dc69365 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -308,8 +308,6 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ -extern int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, - int max); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, @@ -334,6 +332,8 @@ void nfs_retry_commit(struct list_head *page_list, void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(void *data); void nfs_commit_release_pages(struct nfs_write_data *data); +void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); +void nfs_request_remove_commit_list(struct nfs_page *req); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 379a085f8f2..c24e077c282 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -224,6 +224,7 @@ static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; + put_lseg(rdata->lseg); rdata->mds_ops->rpc_release(data); } @@ -310,6 +311,7 @@ static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = (struct nfs_write_data *)data; + put_lseg(wdata->lseg); wdata->mds_ops->rpc_release(data); } @@ -320,6 +322,7 @@ static void filelayout_commit_release(void *data) nfs_commit_release_pages(wdata); if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) nfs_commit_clear_lock(NFS_I(wdata->inode)); + put_lseg(wdata->lseg); nfs_commitdata_release(wdata); } @@ -779,11 +782,16 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) /* The generic layer is about to remove the req from the commit list. * If this will make the bucket empty, it will need to put the lseg reference. - * Note inode lock is held, so we can't do the put here. */ -static struct pnfs_layout_segment * -filelayout_remove_commit_req(struct nfs_page *req) +static void +filelayout_clear_request_commit(struct nfs_page *req) { + struct pnfs_layout_segment *freeme = NULL; + struct inode *inode = req->wb_context->dentry->d_inode; + + spin_lock(&inode->i_lock); + if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) + goto out; if (list_is_singular(&req->wb_list)) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layout_segment *lseg; @@ -792,11 +800,16 @@ filelayout_remove_commit_req(struct nfs_page *req) * since there is only one relevant lseg... */ list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { - if (lseg->pls_range.iomode == IOMODE_RW) - return lseg; + if (lseg->pls_range.iomode == IOMODE_RW) { + freeme = lseg; + break; + } } } - return NULL; +out: + nfs_request_remove_commit_list(req); + spin_unlock(&inode->i_lock); + put_lseg(freeme); } static struct list_head * @@ -829,9 +842,20 @@ filelayout_choose_commit_list(struct nfs_page *req, */ get_lseg(lseg); } + set_bit(PG_COMMIT_TO_DS, &req->wb_flags); return list; } +static void +filelayout_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg) +{ + struct list_head *list; + + list = filelayout_choose_commit_list(req, lseg); + nfs_request_add_commit_list(req, list); +} + static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) { struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); @@ -872,7 +896,7 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); prepare_to_resend_writes(data); - data->mds_ops->rpc_release(data); + filelayout_commit_release(data); return -EAGAIN; } dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); @@ -895,7 +919,7 @@ find_only_write_lseg_locked(struct inode *inode) list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) if (lseg->pls_range.iomode == IOMODE_RW) - return get_lseg(lseg); + return lseg; return NULL; } @@ -905,10 +929,33 @@ static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) spin_lock(&inode->i_lock); rv = find_only_write_lseg_locked(inode); + if (rv) + get_lseg(rv); spin_unlock(&inode->i_lock); return rv; } +static int +filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max) +{ + struct list_head *src = &bucket->written; + struct list_head *dst = &bucket->committing; + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (!nfs_lock_request(req)) + continue; + nfs_request_remove_commit_list(req); + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); + nfs_list_add_request(req, dst); + ret++; + if (ret == max) + break; + } + return ret; +} + /* Move reqs from written to committing lists, returning count of number moved. * Note called with i_lock held. */ @@ -920,21 +967,16 @@ static int filelayout_scan_commit_lists(struct inode *inode, int max) lseg = find_only_write_lseg_locked(inode); if (!lseg) - return 0; + goto out_done; fl = FILELAYOUT_LSEG(lseg); if (fl->commit_through_mds) - goto out_put; - for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i].written)) - continue; - cnt = nfs_scan_commit_list(&fl->commit_buckets[i].written, - &fl->commit_buckets[i].committing, - max); + goto out_done; + for (i = 0; i < fl->number_of_buckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], max); max -= cnt; rv += cnt; } -out_put: - put_lseg(lseg); +out_done: return rv; } @@ -1033,8 +1075,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, - .choose_commit_list = filelayout_choose_commit_list, - .remove_commit_req = filelayout_remove_commit_req, + .mark_request_commit = filelayout_mark_request_commit, + .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6f1c1e3d12b..b5d45158694 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1210,6 +1210,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data) } data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); } + put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1223,6 +1224,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_list_add_request(data->req, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; + put_lseg(data->lseg); nfs_writedata_release(data); } @@ -1323,6 +1325,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data) data->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); + put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index ef92f676cf1..e98ff3027d3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,9 +94,9 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; - struct list_head * (*choose_commit_list) (struct nfs_page *req, + void (*mark_request_commit) (struct nfs_page *req, struct pnfs_layout_segment *lseg); - struct pnfs_layout_segment *(*remove_commit_req) (struct nfs_page *req); + void (*clear_request_commit) (struct nfs_page *req); int (*scan_commit_lists) (struct inode *inode, int max); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); @@ -269,39 +269,42 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); } -static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) +static inline bool +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->dentry->d_inode; - struct list_head *rv; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - if (lseg && NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list) - rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req, lseg); - else - rv = &NFS_I(inode)->commit_list; - return rv; + if (lseg == NULL || ld->mark_request_commit == NULL) + return false; + ld->mark_request_commit(req, lseg); + return true; } -static inline struct pnfs_layout_segment * +static inline bool pnfs_clear_request_commit(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - if (NFS_SERVER(inode)->pnfs_curr_ld && - NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req) - return NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req(req); - else - return NULL; + if (ld == NULL || ld->clear_request_commit == NULL) + return false; + ld->clear_request_commit(req); + return true; } static inline int pnfs_scan_commit_lists(struct inode *inode, int max) { - if (NFS_SERVER(inode)->pnfs_curr_ld && - NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists) - return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(inode, max); - else + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + int ret; + + if (ld == NULL || ld->scan_commit_lists == NULL) return 0; + ret = ld->scan_commit_lists(inode, max); + if (ret != 0) + set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); + return ret; } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -403,18 +406,16 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) return PNFS_NOT_ATTEMPTED; } -static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg) +static inline bool +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { - struct inode *inode = req->wb_context->dentry->d_inode; - - return &NFS_I(inode)->commit_list; + return false; } -static inline struct pnfs_layout_segment * +static inline bool pnfs_clear_request_commit(struct nfs_page *req) { - return NULL; + return false; } static inline int diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3c2540d532c..2662c0298dd 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -66,7 +66,6 @@ void nfs_readdata_free(struct nfs_read_data *p) void nfs_readdata_release(struct nfs_read_data *rdata) { - put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); nfs_readdata_free(rdata); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a630ad65d64..0de19f413f9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -100,7 +100,6 @@ void nfs_writedata_free(struct nfs_write_data *p) void nfs_writedata_release(struct nfs_write_data *wdata) { - put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -393,8 +392,6 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) spin_unlock(&inode->i_lock); } -static struct pnfs_layout_segment *nfs_clear_request_commit(struct nfs_page *req); - /* * Remove a write request from an inode */ @@ -402,18 +399,15 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - struct pnfs_layout_segment *lseg; BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); - lseg = nfs_clear_request_commit(req); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); clear_bit(PG_MAPPED, &req->wb_flags); nfsi->npages--; spin_unlock(&inode->i_lock); - put_lseg(lseg); nfs_release_request(req); } @@ -424,26 +418,69 @@ nfs_mark_request_dirty(struct nfs_page *req) } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -/* - * Add a request to the inode's commit list. +/** + * nfs_request_add_commit_list - add request to a commit list + * @req: pointer to a struct nfs_page + * @head: commit list head + * + * This sets the PG_CLEAN bit, updates the inode global count of + * number of outstanding requests requiring a commit as well as + * the MM page stats. + * + * The caller must _not_ hold the inode->i_lock, but must be + * holding the nfs_page lock. */ -static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +void +nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) { struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - struct list_head *clist; - clist = pnfs_choose_commit_list(req, lseg); - spin_lock(&inode->i_lock); set_bit(PG_CLEAN, &(req)->wb_flags); - nfs_list_add_request(req, clist); - nfsi->ncommit++; + spin_lock(&inode->i_lock); + nfs_list_add_request(req, head); + NFS_I(inode)->ncommit++; spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } +EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); + +/** + * nfs_request_remove_commit_list - Remove request from a commit list + * @req: pointer to a nfs_page + * + * This clears the PG_CLEAN bit, and updates the inode global count of + * number of outstanding requests requiring a commit + * It does not update the MM page stats. + * + * The caller _must_ hold the inode->i_lock and the nfs_page lock. + */ +void +nfs_request_remove_commit_list(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + + if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) + return; + nfs_list_remove_request(req); + NFS_I(inode)->ncommit--; +} +EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); + + +/* + * Add a request to the inode's commit list. + */ +static void +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + + if (pnfs_mark_request_commit(req, lseg)) + return; + nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); +} static void nfs_clear_page_commit(struct page *page) @@ -452,18 +489,19 @@ nfs_clear_page_commit(struct page *page) dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); } -static struct pnfs_layout_segment * +static void nfs_clear_request_commit(struct nfs_page *req) { - struct pnfs_layout_segment *lseg = NULL; + if (test_bit(PG_CLEAN, &req->wb_flags)) { + struct inode *inode = req->wb_context->dentry->d_inode; - if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { + if (!pnfs_clear_request_commit(req)) { + spin_lock(&inode->i_lock); + nfs_request_remove_commit_list(req); + spin_unlock(&inode->i_lock); + } nfs_clear_page_commit(req->wb_page); - lseg = pnfs_clear_request_commit(req); - NFS_I(req->wb_context->dentry->d_inode)->ncommit--; - list_del(&req->wb_list); } - return lseg; } static inline @@ -490,15 +528,14 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, return 0; } #else -static inline void +static void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } -static inline struct pnfs_layout_segment * +static void nfs_clear_request_commit(struct nfs_page *req) { - return NULL; } static inline @@ -523,25 +560,23 @@ nfs_need_commit(struct nfs_inode *nfsi) } /* i_lock held by caller */ -int +static int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max) { struct nfs_page *req, *tmp; int ret = 0; list_for_each_entry_safe(req, tmp, src, wb_list) { - if (nfs_lock_request_dontget(req)) { - kref_get(&req->wb_kref); - list_move_tail(&req->wb_list, dst); - clear_bit(PG_CLEAN, &(req)->wb_flags); - ret++; - if (ret == max) - break; - } + if (!nfs_lock_request(req)) + continue; + nfs_request_remove_commit_list(req); + nfs_list_add_request(req, dst); + ret++; + if (ret == max) + break; } return ret; } -EXPORT_SYMBOL_GPL(nfs_scan_commit_list); /* * nfs_scan_commit - Scan an inode for commit requests @@ -559,14 +594,12 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst) spin_lock(&inode->i_lock); if (nfsi->ncommit > 0) { + const int max = INT_MAX; int pnfs_ret; - ret = nfs_scan_commit_list(&nfsi->commit_list, dst, INT_MAX); - pnfs_ret = pnfs_scan_commit_lists(inode, INT_MAX - ret); - if (pnfs_ret) { - ret += pnfs_ret; - set_bit(NFS_INO_PNFS_COMMIT, &nfsi->flags); - } + ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max); + pnfs_ret = pnfs_scan_commit_lists(inode, max - ret); + ret += pnfs_ret; nfsi->ncommit -= ret; } spin_unlock(&inode->i_lock); @@ -601,7 +634,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, unsigned int rqend; unsigned int end; int error; - struct pnfs_layout_segment *lseg = NULL; if (!PagePrivate(page)) return NULL; @@ -637,8 +669,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - lseg = nfs_clear_request_commit(req); - /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; @@ -650,7 +680,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); - put_lseg(lseg); + nfs_clear_request_commit(req); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -1337,7 +1367,6 @@ void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; - put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } @@ -1647,6 +1676,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) if (req == NULL) break; if (nfs_lock_request_dontget(req)) { + nfs_clear_request_commit(req); nfs_inode_remove_request(req); /* * In case nfs_inode_remove_request has marked the diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 50856e9c1e5..eac30d6bec1 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -28,6 +28,7 @@ enum { PG_NEED_COMMIT, PG_NEED_RESCHED, PG_PARTIAL_READ_FAILED, + PG_COMMIT_TO_DS, }; struct nfs_inode; @@ -104,6 +105,16 @@ nfs_lock_request_dontget(struct nfs_page *req) return !test_and_set_bit(PG_BUSY, &req->wb_flags); } +static inline int +nfs_lock_request(struct nfs_page *req) +{ + if (test_and_set_bit(PG_BUSY, &req->wb_flags)) + return 0; + kref_get(&req->wb_kref); + return 1; +} + + /** * nfs_list_add_request - Insert a request into a list * @req: request -- cgit v1.2.3-70-g09d2