From 5a421ce3c062a87db0a9e7f2a0a7ee0a5b869aab Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 10 Jul 2009 12:37:40 +0300 Subject: nfsd41: gather and report statistics also for v4.1 ops Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index bd2eba53066..aff924a24ab 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -234,7 +234,7 @@ enum nfs_opnum4 { Needs to be updated if more operations are defined in future.*/ #define FIRST_NFS4_OP OP_ACCESS -#define LAST_NFS4_OP OP_RELEASE_LOCKOWNER +#define LAST_NFS4_OP OP_RECLAIM_COMPLETE enum nfsstat4 { NFS4_OK = 0, -- cgit v1.2.3-70-g09d2 From 4bd9b0f4afc76cf972578c702e1bc1b6f2d10ba5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 24 Jun 2009 15:37:45 -0400 Subject: nfsd41: use globals for DRC limits The version 4.1 DRC memory limit and tracking variables are server wide and session specific. Replace struct svc_serv fields with globals. Stop using the svc_serv sv_lock. Add a spinlock to serialize access to the DRC limit management variables which change on session creation and deletion (usage counter) or (future) administrative action to adjust the total DRC memory limit. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy --- fs/nfsd/nfs4state.c | 10 +++++----- fs/nfsd/nfssvc.c | 19 +++++++++++++++---- include/linux/nfsd/nfsd.h | 3 +++ include/linux/sunrpc/svc.h | 2 -- 4 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 980a216a48c..2e6a44e3d2f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -430,11 +430,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - spin_lock(&nfsd_serv->sv_lock); - if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) - np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; - nfsd_serv->sv_drc_pages_used += np; - spin_unlock(&nfsd_serv->sv_lock); + spin_lock(&nfsd_drc_lock); + if (np + nfsd_drc_pages_used > nfsd_drc_max_pages) + np = nfsd_drc_max_pages - nfsd_drc_pages_used; + nfsd_drc_pages_used += np; + spin_unlock(&nfsd_drc_lock); if (np <= 0) { status = nfserr_resource; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d4c9884cd54..78d8fcd883f 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -67,6 +67,16 @@ struct timeval nfssvc_boot; DEFINE_MUTEX(nfsd_mutex); struct svc_serv *nfsd_serv; +/* + * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. + * nfsd_drc_max_pages limits the total amount of memory available for + * version 4.1 DRC caches. + * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. + */ +spinlock_t nfsd_drc_lock; +unsigned int nfsd_drc_max_pages; +unsigned int nfsd_drc_pages_used; + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static struct svc_version * nfsd_acl_version[] = { @@ -238,11 +248,12 @@ static void set_max_drc(void) { /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ #define NFSD_DRC_SIZE_SHIFT 7 - nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() + nfsd_drc_max_pages = nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT; - nfsd_serv->sv_drc_pages_used = 0; - dprintk("%s svc_drc_max_pages %u\n", __func__, - nfsd_serv->sv_drc_max_pages); + nfsd_drc_pages_used = 0; + spin_lock_init(&nfsd_drc_lock); + dprintk("%s nfsd_drc_max_pages %u\n", __func__, + nfsd_drc_max_pages); } int nfsd_create_serv(void) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 2b49d676d0c..2571f856908 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -56,6 +56,9 @@ extern struct svc_version nfsd_version2, nfsd_version3, extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; +extern spinlock_t nfsd_drc_lock; +extern unsigned int nfsd_drc_max_pages; +extern unsigned int nfsd_drc_pages_used; extern struct seq_operations nfs_exports_op; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index ea8009695c6..52e8cb0a756 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -94,8 +94,6 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ - unsigned int sv_drc_max_pages; /* Total pages for DRC */ - unsigned int sv_drc_pages_used;/* DRC pages used */ #if defined(CONFIG_NFS_V4_1) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same -- cgit v1.2.3-70-g09d2 From 0c193054a4c1cf190d2f23e5e91bd14402e43912 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 27 Jul 2009 19:09:19 -0400 Subject: nfsd41: hange from page to memory based drc limits NFSD_SLOT_CACHE_SIZE is the size of all encoded operation responses (excluding the sequence operation) that we want to cache. For now, keep NFSD_SLOT_CACHE_SIZE at PAGE_SIZE. It will be reduced when the DRC is changed from page based to memory based. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 28 +++++++++++++--------------- fs/nfsd/nfssvc.c | 13 ++++++------- include/linux/nfsd/nfsd.h | 4 ++-- include/linux/nfsd/state.h | 1 + 4 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 70cba3fbfa6..e2b11b1b515 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -414,31 +414,31 @@ gen_sessionid(struct nfsd4_session *ses) /* * Give the client the number of slots it requests bound by - * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. + * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem. * - * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we - * should (up to a point) re-negotiate active sessions and reduce their - * slot usage to make rooom for new connections. For now we just fail the - * create session. + * If we run out of reserved DRC memory we should (up to a point) re-negotiate + * active sessions and reduce their slot usage to make rooom for new + * connections. For now we just fail the create session. */ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) { - int np; + int mem; if (fchan->maxreqs < 1) return nfserr_inval; else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; + mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE; spin_lock(&nfsd_drc_lock); - if (np + nfsd_drc_pages_used > nfsd_drc_max_pages) - np = nfsd_drc_max_pages - nfsd_drc_pages_used; - nfsd_drc_pages_used += np; + if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) + mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) / + NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE; + nfsd_drc_mem_used += mem; spin_unlock(&nfsd_drc_lock); - fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; + fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE; if (fchan->maxreqs == 0) return nfserr_resource; return 0; @@ -465,9 +465,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, fchan->maxresp_sz = maxcount; session_fchan->maxresp_sz = fchan->maxresp_sz; - /* Set the max response cached size our default which is - * a multiple of PAGE_SIZE and small */ - session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; + session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE; fchan->maxresp_cached = session_fchan->maxresp_cached; /* Use the client's maxops if possible */ @@ -585,7 +583,7 @@ free_session(struct kref *kref) nfsd4_release_respages(e->ce_respages, e->ce_resused); } spin_lock(&nfsd_drc_lock); - nfsd_drc_pages_used -= ses->se_fchannel.maxreqs * NFSD_PAGES_PER_SLOT; + nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; spin_unlock(&nfsd_drc_lock); kfree(ses); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9be2a1932f8..5a280a9cb54 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -74,8 +74,8 @@ struct svc_serv *nfsd_serv; * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ spinlock_t nfsd_drc_lock; -unsigned int nfsd_drc_max_pages; -unsigned int nfsd_drc_pages_used; +unsigned int nfsd_drc_max_mem; +unsigned int nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; @@ -247,12 +247,11 @@ void nfsd_reset_versions(void) static void set_max_drc(void) { #define NFSD_DRC_SIZE_SHIFT 10 - nfsd_drc_max_pages = nr_free_buffer_pages() - >> NFSD_DRC_SIZE_SHIFT; - nfsd_drc_pages_used = 0; + nfsd_drc_max_mem = (nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; + nfsd_drc_mem_used = 0; spin_lock_init(&nfsd_drc_lock); - dprintk("%s nfsd_drc_max_pages %u\n", __func__, - nfsd_drc_max_pages); + dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); } int nfsd_create_serv(void) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 2571f856908..2812ed52669 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -57,8 +57,8 @@ extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; extern spinlock_t nfsd_drc_lock; -extern unsigned int nfsd_drc_max_pages; -extern unsigned int nfsd_drc_pages_used; +extern unsigned int nfsd_drc_max_mem; +extern unsigned int nfsd_drc_mem_used; extern struct seq_operations nfs_exports_op; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 57ab2ed0845..a6c87d62389 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -96,6 +96,7 @@ struct nfs4_cb_conn { #define NFSD_MAX_SLOTS_PER_SESSION 128 /* Maximum number of pages per slot cache entry */ #define NFSD_PAGES_PER_SLOT 1 +#define NFSD_SLOT_CACHE_SIZE PAGE_SIZE /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 -- cgit v1.2.3-70-g09d2 From 49557cc74c7bdf6a984be227ead9a84b3a26f053 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 23 Jul 2009 19:02:16 -0400 Subject: nfsd41: Use separate DRC for setclientid Instead of trying to share the generic 4.1 reply cache code for the CREATE_SESSION reply cache, it's simpler to handle CREATE_SESSION separately. The nfs41 single slot clientid DRC holds the results of create session processing. CREATE_SESSION can be preceeded by a SEQUENCE operation (an embedded CREATE_SESSION) and the create session single slot cache must be maintained. nfsd4_replay_cache_entry() and nfsd4_store_cache_entry() do not implement the replay of an embedded CREATE_SESSION. The clientid DRC slot does not need the inuse, cachethis or other fields that the multiple slot session cache uses. Replace the clientid DRC cache struct nfs4_slot cache with a new nfsd4_clid_slot cache. Save the xdr struct nfsd4_create_session into the cache at the end of processing, and on a replay, replace the struct for the replay request with the cached version all while under the state lock. nfsd4_proc_compound will handle both the solo and embedded CREATE_SESSION case via the normal use of encode_operation. Errors that do not change the create session cache: A create session NFS4ERR_STALE_CLIENTID error means that a client record (and associated create session slot) could not be found and therefore can't be changed. NFSERR_SEQ_MISORDERED errors do not change the slot cache. All other errors get cached. Remove the clientid DRC specific check in nfs4svc_encode_compoundres to put the session only if cstate.session is set which will now always be true. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++------------------- fs/nfsd/nfs4xdr.c | 3 +-- include/linux/nfsd/state.h | 21 ++++++++++++++- include/linux/nfsd/xdr4.h | 12 --------- 5 files changed, 60 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d781658e808..d606c6a427d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1120,7 +1120,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, BUG_ON(op->status == nfs_ok); encode_op: - /* Only from SEQUENCE or CREATE_SESSION */ + /* Only from SEQUENCE */ if (resp->cstate.status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); if (nfsd4_not_cached(resp)) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 99df8e7a687..7729d092c8a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -653,8 +653,6 @@ static inline void free_client(struct nfs4_client *clp) { shutdown_callback_client(clp); - nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, - clp->cl_slot.sl_cache_entry.ce_resused); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -1293,12 +1291,11 @@ out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; - new->cl_slot.sl_seqid = 0; exid->seqid = 1; nfsd4_set_ex_flags(new, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_slot.sl_seqid, new->cl_exchange_flags); + new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); status = nfs_ok; out: @@ -1334,15 +1331,35 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) return nfserr_seq_misordered; } +/* + * Cache the create session result into the create session single DRC + * slot cache by saving the xdr structure. sl_seqid has been set. + * Do this for solo or embedded create session operations. + */ +static void +nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot, int nfserr) +{ + slot->sl_status = nfserr; + memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); +} + +static __be32 +nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot) +{ + memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); + return slot->sl_status; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; - struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *conf, *unconf; - struct nfsd4_slot *slot = NULL; + struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; nfs4_lock_state(); @@ -1350,25 +1367,22 @@ nfsd4_create_session(struct svc_rqst *rqstp, conf = find_confirmed_client(&cr_ses->clientid); if (conf) { - slot = &conf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid, - slot->sl_inuse); + cs_slot = &conf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { dprintk("Got a create_session replay! seqid= %d\n", - slot->sl_seqid); - cstate->slot = slot; - cstate->status = status; + cs_slot->sl_seqid); /* Return the cached reply status */ - status = nfsd4_replay_cache_entry(resp, NULL); + status = nfsd4_replay_create_session(cr_ses, cs_slot); goto out; - } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { + } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { status = nfserr_seq_misordered; dprintk("Sequence misordered!\n"); dprintk("Expected seqid= %d but got seqid= %d\n", - slot->sl_seqid, cr_ses->seqid); + cs_slot->sl_seqid, cr_ses->seqid); goto out; } - conf->cl_slot.sl_seqid++; + cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || (ip_addr != unconf->cl_addr)) { @@ -1376,16 +1390,15 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out; } - slot = &unconf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid, - slot->sl_inuse); + cs_slot = &unconf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; - goto out; + goto out_cache; } - slot->sl_seqid++; /* from 0 to 1 */ + cs_slot->sl_seqid++; /* from 0 to 1 */ move_to_confirmed(unconf); /* @@ -1406,12 +1419,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - cr_ses->seqid = slot->sl_seqid; + cr_ses->seqid = cs_slot->sl_seqid; - slot->sl_inuse = true; - cstate->slot = slot; - /* Ensure a page is used for the cache */ - slot->sl_cache_entry.ce_cachethis = 1; +out_cache: + /* cache solo and embedded create sessions under the state lock */ + nfsd4_cache_create_session(cr_ses, cs_slot, status); out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2dcc7feaa6f..fdf632bf1cf 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3313,8 +3313,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); resp->cstate.slot->sl_inuse = 0; } - if (resp->cstate.session) - nfsd4_put_session(resp->cstate.session); + nfsd4_put_session(resp->cstate.session); } return 1; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index a6c87d62389..58bb19784e1 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -127,6 +127,25 @@ struct nfsd4_channel_attrs { u32 rdma_attrs; }; +struct nfsd4_create_session { + clientid_t clientid; + struct nfs4_sessionid sessionid; + u32 seqid; + u32 flags; + struct nfsd4_channel_attrs fore_channel; + struct nfsd4_channel_attrs back_channel; + u32 callback_prog; + u32 uid; + u32 gid; +}; + +/* The single slot clientid cache structure */ +struct nfsd4_clid_slot { + u32 sl_seqid; + __be32 sl_status; + struct nfsd4_create_session sl_cr_ses; +}; + struct nfsd4_session { struct kref se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -193,7 +212,7 @@ struct nfs4_client { /* for nfs41 */ struct list_head cl_sessions; - struct nfsd4_slot cl_slot; /* create_session slot */ + struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; struct nfs4_sessionid cl_sessionid; }; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 2bacf753506..5e4beb0deb8 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -366,18 +366,6 @@ struct nfsd4_exchange_id { int spa_how; }; -struct nfsd4_create_session { - clientid_t clientid; - struct nfs4_sessionid sessionid; - u32 seqid; - u32 flags; - struct nfsd4_channel_attrs fore_channel; - struct nfsd4_channel_attrs back_channel; - u32 callback_prog; - u32 uid; - u32 gid; -}; - struct nfsd4_sequence { struct nfs4_sessionid sessionid; /* request/response */ u32 seqid; /* request/response */ -- cgit v1.2.3-70-g09d2 From 4516fc0454e7ffe2f369e80045b23c2b32155004 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:54 -0400 Subject: sunrpc: add routine for comparing addresses lockd needs these sort of routines, as does the NFSv4 callback code. Move lockd's routines into common code and rename them so that they can be used by others. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 2 +- fs/lockd/host.c | 4 ++-- fs/lockd/mon.c | 2 +- fs/lockd/svcsubs.c | 2 +- include/linux/lockd/lockd.h | 43 ---------------------------------------- include/linux/sunrpc/clnt.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 53 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 1f3b0fc0d35..fc9032dc886 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) + if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7cb076ac6b4..4600c2037b8 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) */ chain = &nlm_hosts[nlm_hash_address(ni->sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) + if (!rpc_cmp_addr(nlm_addr(host), ni->sap)) continue; /* See if we have an NSM handle for this client */ @@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) if (host->h_server != ni->server) continue; if (ni->server && - !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) + !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) continue; /* Move to head of hash chain. */ diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 30c933188dd..f956651d0f6 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) struct nsm_handle *nsm; list_for_each_entry(nsm, &nsm_handles, sm_link) - if (nlm_cmp_addr(nsm_addr(nsm), sap)) + if (rpc_cmp_addr(nsm_addr(nsm), sap)) return nsm; return NULL; } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 9e4d6aab611..ad478da7ca6 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) { - return nlm_cmp_addr(nlm_srcaddr(host), datap); + return rpc_cmp_addr(nlm_srcaddr(host), datap); } /** diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c325b187966..e7a251a988c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) } } -static inline int __nlm_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); -} -#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ -static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return 0; -} -#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ - -/* - * Compare two host addresses - * - * Return TRUE if the addresses are the same; otherwise FALSE. - */ -static inline int nlm_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __nlm_cmp_addr4(sap1, sap2); - case AF_INET6: - return __nlm_cmp_addr6(sap1, sap2); - } - } - return 0; -} - /* * Compare two NLM locks. * When the second lock is of type F_UNLCK, this acts like a wildcard. diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab3f6e90caa..b17df361be8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -22,6 +22,7 @@ #include #include #include +#include struct rpc_inode; @@ -188,5 +189,52 @@ static inline void rpc_set_port(struct sockaddr *sap, #define IPV6_SCOPE_DELIMITER '%' #define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); +} +#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} +#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, scope, etc. + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3-70-g09d2 From be3ad6b0b675fd1d6b48362ca30bdee75fbef6b4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:55 -0400 Subject: sunrpc: add common routine for copying address portion of a sockaddr Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b17df361be8..044f531aee7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -198,6 +198,17 @@ static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) @@ -206,12 +217,29 @@ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); } + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr); + return true; +} #else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) { return false; } + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} #endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ /** @@ -236,5 +264,27 @@ static inline bool rpc_cmp_addr(const struct sockaddr *sap1, return false; } +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3-70-g09d2 From 363168b4ea8ec26aeb982ac6024a09f907ecd27e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:56 -0400 Subject: nfsd: make nfs4_client->cl_addr a struct sockaddr_storage It's currently a __be32, which isn't big enough to hold an IPv6 address. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 32 +++++++++++++++++++------------- include/linux/nfsd/state.h | 2 +- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9295c4b56bc..bfc14d879ea 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -55,6 +55,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -1220,13 +1221,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, int status; unsigned int strhashval; char dname[HEXDIR_LEN]; + char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct sockaddr *sa = svc_addr(rqstp); + rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " - " ip_addr=%u flags %x, spa_how %d\n", + "ip_addr=%s flags %x, spa_how %d\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, - ip_addr, exid->flags, exid->spa_how); + addr_str, exid->flags, exid->spa_how); if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) return nfserr_inval; @@ -1315,7 +1318,7 @@ out_new: copy_verf(new, &verf); copy_cred(&new->cl_cred, &rqstp->rq_cred); - new->cl_addr = ip_addr; + rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa); gen_clid(new); gen_confirm(new); add_to_unconfirmed(new, strhashval); @@ -1389,7 +1392,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; @@ -1417,7 +1420,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || - (ip_addr != unconf->cl_addr)) { + !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; goto out; } @@ -1564,7 +1567,7 @@ __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct xdr_netobj clname = { .len = setclid->se_namelen, .data = setclid->se_name, @@ -1596,8 +1599,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - dprintk("NFSD: setclientid: string in use by client" - " at %pI4\n", &conf->cl_addr); + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, + sizeof(addr_str)); + dprintk("NFSD: setclientid: string in use by client " + "at %s\n", addr_str); goto out; } } @@ -1659,7 +1665,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, gen_clid(new); } copy_verf(new, &clverifier); - new->cl_addr = sin->sin_addr.s_addr; + rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa); new->cl_flavor = rqstp->rq_flavor; princ = svc_gss_principal(rqstp); if (princ) { @@ -1693,7 +1699,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid_confirm *setclientid_confirm) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; @@ -1712,9 +1718,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, unconf = find_unconfirmed_client(clid); status = nfserr_clid_inuse; - if (conf && conf->cl_addr != sin->sin_addr.s_addr) + if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) goto out; - if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) + if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) goto out; /* diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 58bb19784e1..3510ddd4be4 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -200,7 +200,7 @@ struct nfs4_client { char cl_recdir[HEXDIR_LEN]; /* recovery dir */ nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ - __be32 cl_addr; /* client ipaddress */ + struct sockaddr_storage cl_addr; /* client ipaddress */ u32 cl_flavor; /* setclientid pseudoflavor */ char *cl_principal; /* setclientid principal name */ struct svc_cred cl_cred; /* setclientid principal */ -- cgit v1.2.3-70-g09d2 From aa9a4ec7707a5391cde556f3fa1b0eb4bca3bcf6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:57 -0400 Subject: nfsd: convert nfs4_cb_conn struct to hold address in sockaddr_storage ...rather than as a separate address and port fields. This will be necessary for implementing callbacks over IPv6. Also, convert gen_callback to use the standard rpcuaddr2sockaddr routine rather than its own private one. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 11 ++----- fs/nfsd/nfs4state.c | 81 ++++++---------------------------------------- include/linux/nfsd/state.h | 4 +-- 3 files changed, 13 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3fd23f7acec..81d1c5285dc 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -377,7 +377,6 @@ static int max_cb_time(void) int setup_callback_client(struct nfs4_client *clp) { - struct sockaddr_in addr; struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { .to_initval = max_cb_time(), @@ -385,8 +384,8 @@ int setup_callback_client(struct nfs4_client *clp) }; struct rpc_create_args args = { .protocol = IPPROTO_TCP, - .address = (struct sockaddr *)&addr, - .addrsize = sizeof(addr), + .address = (struct sockaddr *) &cb->cb_addr, + .addrsize = cb->cb_addrlen, .timeout = &timeparms, .program = &cb_program, .prognumber = cb->cb_prog, @@ -400,12 +399,6 @@ int setup_callback_client(struct nfs4_client *clp) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - /* Initialize address */ - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_port = htons(cb->cb_port); - addr.sin_addr.s_addr = htonl(cb->cb_addr); - /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bfc14d879ea..96a742308ce 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -897,76 +897,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, return NULL; } -/* a helper function for parse_callback */ -static int -parse_octet(unsigned int *lenp, char **addrp) -{ - unsigned int len = *lenp; - char *p = *addrp; - int n = -1; - char c; - - for (;;) { - if (!len) - break; - len--; - c = *p++; - if (c == '.') - break; - if ((c < '0') || (c > '9')) { - n = -1; - break; - } - if (n < 0) - n = 0; - n = (n * 10) + (c - '0'); - if (n > 255) { - n = -1; - break; - } - } - *lenp = len; - *addrp = p; - return n; -} - -/* parse and set the setclientid ipv4 callback address */ -static int -parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) -{ - int temp = 0; - u32 cbaddr = 0; - u16 cbport = 0; - u32 addrlen = addr_len; - char *addr = addr_val; - int i, shift; - - /* ipaddress */ - shift = 24; - for(i = 4; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbaddr |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbaddrp = cbaddr; - - /* port */ - shift = 8; - for(i = 2; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbport |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbportp = cbport; - return 1; -} - static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) { @@ -976,14 +906,21 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) goto out_err; - if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, - &cb->cb_addr, &cb->cb_port))) + cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + se->se_callback_addr_len, + (struct sockaddr *) &cb->cb_addr, + sizeof(cb->cb_addr)); + + if (!cb->cb_addrlen || cb->cb_addr.ss_family != AF_INET) goto out_err; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; return; out_err: + cb->cb_addr.ss_family = AF_UNSPEC; + cb->cb_addrlen = 0; dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 3510ddd4be4..fb0c404c7c5 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -81,8 +81,8 @@ struct nfs4_delegation { /* client delegation callback info */ struct nfs4_cb_conn { /* SETCLIENTID info */ - u32 cb_addr; - unsigned short cb_port; + struct sockaddr_storage cb_addr; + size_t cb_addrlen; u32 cb_prog; u32 cb_minorversion; u32 cb_ident; /* minorversion 0 only */ -- cgit v1.2.3-70-g09d2 From fbf4665f41b02e757ab9d9198df65e319388e728 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:59 -0400 Subject: nfsd: populate sin6_scope_id on callback address with scopeid from rq_addr on SETCLIENTID call When a SETCLIENTID call comes in, one of the args given is the svc_rqst. This struct contains an rq_addr field which holds the address that sent the call. If this is an IPv6 address, then we can use the sin6_scope_id field in this address to populate the sin6_scope_id field in the callback address. AFAICT, the rq_addr.sin6_scope_id is non-zero if and only if the client mounted the server's link-local address. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 +++++-- include/linux/sunrpc/clnt.h | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ec0ca1ef4e..d2a05248090 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -898,7 +898,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, } static void -gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) { struct nfs4_cb_conn *cb = &clp->cl_cb_conn; unsigned short expected_family; @@ -921,6 +921,9 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) goto out_err; + if (cb->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; @@ -1621,7 +1624,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } copy_cred(&new->cl_cred, &rqstp->rq_cred); gen_confirm(new); - gen_callback(new, setclid); + gen_callback(new, setclid, rpc_get_scope_id(sa)); add_to_unconfirmed(new, strhashval); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 044f531aee7..3d025588e56 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -286,5 +286,20 @@ static inline bool rpc_copy_addr(struct sockaddr *dst, return false; } +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3-70-g09d2 From 468de9e54a900559b55aa939a4daeaea1915e572 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 27 Aug 2009 12:07:40 -0400 Subject: nfsd41: expand solo sequence check Compounds consisting of only a sequence operation don't need any additional caching beyond the sequence information we store in the slot entry. Fix nfsd4_is_solo_sequence to identify this case correctly. The additional check for a failed sequence in nfsd4_store_cache_entry() is redundant, since the nfsd4_is_solo_sequence call lower down catches this case. The final ce_cachethis set in nfsd4_sequence is also redundant. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 --------- include/linux/nfsd/xdr4.h | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5f634d24861..b44a2cfde6f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -991,16 +991,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; struct svc_rqst *rqstp = resp->rqstp; - struct nfsd4_compoundargs *args = rqstp->rq_argp; - struct nfsd4_op *op = &args->ops[resp->opcnt]; struct kvec *resv = &rqstp->rq_res.head[0]; dprintk("--> %s entry %p\n", __func__, entry); - /* Don't cache a failed OP_SEQUENCE. */ - if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) - return; - nfsd4_release_respages(entry->ce_respages, entry->ce_resused); entry->ce_opcnt = resp->opcnt; entry->ce_status = resp->cstate.status; @@ -1490,9 +1484,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, slot->sl_inuse = true; slot->sl_seqid = seq->seqid; slot->sl_cache_entry.ce_cachethis = seq->cachethis; - /* Always set the cache entry cachethis for solo sequence */ - if (nfsd4_is_solo_sequence(resp)) - slot->sl_cache_entry.ce_cachethis = 1; cstate->slot = slot; cstate->session = session; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 5e4beb0deb8..3f716607c86 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -467,7 +467,7 @@ struct nfsd4_compoundres { static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) { struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; - return args->opcnt == 1; + return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; } static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) -- cgit v1.2.3-70-g09d2 From a649637c73a36174287a403cdda7607177d64523 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 28 Aug 2009 08:45:01 -0400 Subject: nfsd41: bound forechannel drc size by memory usage By using the requested ca_maxresponsesize_cached * ca_maxresponses to bound a forechannel drc request size, clients can tailor a session to usage. For example, an I/O session (READ/WRITE only) can have a much smaller ca_maxresponsesize_cached (for only WRITE compound responses) and a lot larger ca_maxresponses to service a large in-flight data window. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++------------- include/linux/nfsd/state.h | 8 ++++-- 2 files changed, 54 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b44a2cfde6f..02b3ddd0bee 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -414,34 +414,64 @@ gen_sessionid(struct nfsd4_session *ses) } /* - * Give the client the number of slots it requests bound by - * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem. + * The protocol defines ca_maxresponssize_cached to include the size of + * the rpc header, but all we need to cache is the data starting after + * the end of the initial SEQUENCE operation--the rest we regenerate + * each time. Therefore we can advertise a ca_maxresponssize_cached + * value that is the number of bytes in our cache plus a few additional + * bytes. In order to stay on the safe side, and not promise more than + * we can cache, those additional bytes must be the minimum possible: 24 + * bytes of rpc header (xid through accept state, with AUTH_NULL + * verifier), 12 for the compound header (with zero-length tag), and 44 + * for the SEQUENCE op response: + */ +#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) + +/* + * Give the client the number of ca_maxresponsesize_cached slots it + * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, + * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more + * than NFSD_MAX_SLOTS_PER_SESSION. * - * If we run out of reserved DRC memory we should (up to a point) re-negotiate - * active sessions and reduce their slot usage to make rooom for new - * connections. For now we just fail the create session. + * If we run out of reserved DRC memory we should (up to a point) + * re-negotiate active sessions and reduce their slot usage to make + * rooom for new connections. For now we just fail the create session. */ -static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) +static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) { - int mem; + int mem, size = fchan->maxresp_cached; if (fchan->maxreqs < 1) return nfserr_inval; - else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) - fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE; + if (size < NFSD_MIN_HDR_SEQ_SZ) + size = NFSD_MIN_HDR_SEQ_SZ; + size -= NFSD_MIN_HDR_SEQ_SZ; + if (size > NFSD_SLOT_CACHE_SIZE) + size = NFSD_SLOT_CACHE_SIZE; + + /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */ + mem = fchan->maxreqs * size; + if (mem > NFSD_MAX_MEM_PER_SESSION) { + fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size; + if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) + fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; + mem = fchan->maxreqs * size; + } spin_lock(&nfsd_drc_lock); - if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) - mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) / - NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE; + /* bound the total session drc memory ussage */ + if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) { + fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size; + mem = fchan->maxreqs * size; + } nfsd_drc_mem_used += mem; spin_unlock(&nfsd_drc_lock); - fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE; if (fchan->maxreqs == 0) return nfserr_resource; + + fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; return 0; } @@ -466,9 +496,6 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, fchan->maxresp_sz = maxcount; session_fchan->maxresp_sz = fchan->maxresp_sz; - session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE; - fchan->maxresp_cached = session_fchan->maxresp_cached; - /* Use the client's maxops if possible */ if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; @@ -478,9 +505,12 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, * recover pages from existing sessions. For now fail session * creation. */ - status = set_forechannel_maxreqs(fchan); + status = set_forechannel_drc_size(fchan); + session_fchan->maxresp_cached = fchan->maxresp_cached; session_fchan->maxreqs = fchan->maxreqs; + + dprintk("%s status %d\n", __func__, status); return status; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index fb0c404c7c5..ff0b771efde 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -92,13 +92,17 @@ struct nfs4_cb_conn { struct rpc_cred * cb_cred; }; -/* Maximum number of slots per session. 128 is useful for long haul TCP */ -#define NFSD_MAX_SLOTS_PER_SESSION 128 +/* Maximum number of slots per session. 160 is useful for long haul TCP */ +#define NFSD_MAX_SLOTS_PER_SESSION 160 /* Maximum number of pages per slot cache entry */ #define NFSD_PAGES_PER_SLOT 1 #define NFSD_SLOT_CACHE_SIZE PAGE_SIZE /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 +/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ +#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 +#define NFSD_MAX_MEM_PER_SESSION \ + (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE) struct nfsd4_cache_entry { __be32 ce_status; -- cgit v1.2.3-70-g09d2 From 557ce2646e775f6bda734dd92b10d4780874b9c7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 28 Aug 2009 08:45:04 -0400 Subject: nfsd41: replace page based DRC with buffer based DRC Use NFSD_SLOT_CACHE_SIZE size buffers for sessions DRC instead of holding nfsd pages in cache. Connectathon testing has shown that 1024 bytes for encoded compound operation responses past the sequence operation is sufficient, 512 bytes is a little too small. Set NFSD_SLOT_CACHE_SIZE to 1024. Allocate memory for the session DRC in the CREATE_SESSION operation to guarantee that the memory resource is available for caching responses. Allocate each slot individually in preparation for slot table size negotiation. Remove struct nfsd4_cache_entry and helper functions for the old page-based DRC. The iov_len calculation in nfs4svc_encode_compoundres is now always correct. Replay is now done in nfsd4_sequence under the state lock, so the session ref count is only bumped on non-replay. Clean up the nfs4svc_encode_compoundres session logic. The nfsd4_compound_state statp pointer is also not used. Remove nfsd4_set_statp(). Move useful nfsd4_cache_entry fields into nfsd4_slot. Signed-off-by: Andy Adamson --- fs/nfsd/nfs4state.c | 211 +++++++++++++-------------------------------- fs/nfsd/nfs4xdr.c | 17 ++-- fs/nfsd/nfssvc.c | 4 - include/linux/nfsd/state.h | 27 +++--- include/linux/nfsd/xdr4.h | 5 +- 5 files changed, 79 insertions(+), 185 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c9a45f49019..46e9ac52687 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -514,12 +514,23 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, return status; } +static void +free_session_slots(struct nfsd4_session *ses) +{ + int i; + + for (i = 0; i < ses->se_fchannel.maxreqs; i++) + kfree(ses->se_slots[i]); +} + static int alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) { struct nfsd4_session *new, tmp; - int idx, status = nfserr_serverfault, slotsize; + struct nfsd4_slot *sp; + int idx, slotsize, cachesize, i; + int status; memset(&tmp, 0, sizeof(tmp)); @@ -530,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, if (status) goto out; - /* allocate struct nfsd4_session and slot table in one piece */ - slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) + + sizeof(struct nfsd4_session) > PAGE_SIZE); + + status = nfserr_serverfault; + /* allocate struct nfsd4_session and slot table pointers in one piece */ + slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); if (!new) goto out; memcpy(new, &tmp, sizeof(*new)); + /* allocate each struct nfsd4_slot and data cache in one piece */ + cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + for (i = 0; i < new->se_fchannel.maxreqs; i++) { + sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); + if (!sp) + goto out_free; + new->se_slots[i] = sp; + } + new->se_client = clp; gen_sessionid(new); idx = hash_sessionid(&new->se_sessionid); @@ -554,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, status = nfs_ok; out: return status; +out_free: + free_session_slots(new); + kfree(new); + goto out; } /* caller must hold sessionid_lock */ @@ -596,22 +624,16 @@ release_session(struct nfsd4_session *ses) nfsd4_put_session(ses); } -static void nfsd4_release_respages(struct page **respages, short resused); - void free_session(struct kref *kref) { struct nfsd4_session *ses; - int i; ses = container_of(kref, struct nfsd4_session, se_ref); - for (i = 0; i < ses->se_fchannel.maxreqs; i++) { - struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; - nfsd4_release_respages(e->ce_respages, e->ce_resused); - } spin_lock(&nfsd_drc_lock); nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; spin_unlock(&nfsd_drc_lock); + free_session_slots(ses); kfree(ses); } @@ -968,116 +990,31 @@ out_err: return; } -void -nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) -{ - struct nfsd4_compoundres *resp = rqstp->rq_resp; - - resp->cstate.statp = statp; -} - -/* - * Dereference the result pages. - */ -static void -nfsd4_release_respages(struct page **respages, short resused) -{ - int i; - - dprintk("--> %s\n", __func__); - for (i = 0; i < resused; i++) { - if (!respages[i]) - continue; - put_page(respages[i]); - respages[i] = NULL; - } -} - -static void -nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) -{ - int i; - - for (i = 0; i < count; i++) { - topages[i] = frompages[i]; - if (!topages[i]) - continue; - get_page(topages[i]); - } -} - /* - * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous - * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total - * length of the XDR response is less than se_fmaxresp_cached - * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a - * of the reply (e.g. readdir). - * - * Store the base and length of the rq_req.head[0] page - * of the NFSv4.1 data, just past the rpc header. + * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. */ void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { - struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; - struct svc_rqst *rqstp = resp->rqstp; - struct kvec *resv = &rqstp->rq_res.head[0]; - - dprintk("--> %s entry %p\n", __func__, entry); + struct nfsd4_slot *slot = resp->cstate.slot; + unsigned int base; - nfsd4_release_respages(entry->ce_respages, entry->ce_resused); - entry->ce_opcnt = resp->opcnt; - entry->ce_status = resp->cstate.status; + dprintk("--> %s slot %p\n", __func__, slot); - /* - * Don't need a page to cache just the sequence operation - the slot - * does this for us! - */ + slot->sl_opcnt = resp->opcnt; + slot->sl_status = resp->cstate.status; if (nfsd4_not_cached(resp)) { - entry->ce_resused = 0; - entry->ce_rpchdrlen = 0; - dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, - resp->cstate.slot->sl_cache_entry.ce_cachethis); + slot->sl_datalen = 0; return; } - entry->ce_resused = rqstp->rq_resused; - if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) - entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; - nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, - entry->ce_resused); - entry->ce_datav.iov_base = resp->cstate.statp; - entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - - (char *)page_address(rqstp->rq_respages[0])); - /* Current request rpc header length*/ - entry->ce_rpchdrlen = (char *)resp->cstate.statp - - (char *)page_address(rqstp->rq_respages[0]); -} - -/* - * We keep the rpc header, but take the nfs reply from the replycache. - */ -static int -nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, - struct nfsd4_cache_entry *entry) -{ - struct svc_rqst *rqstp = resp->rqstp; - struct kvec *resv = &resp->rqstp->rq_res.head[0]; - int len; - - /* Current request rpc header length*/ - len = (char *)resp->cstate.statp - - (char *)page_address(rqstp->rq_respages[0]); - if (entry->ce_datav.iov_len + len > PAGE_SIZE) { - dprintk("%s v41 cached reply too large (%Zd).\n", __func__, - entry->ce_datav.iov_len); - return 0; - } - /* copy the cached reply nfsd data past the current rpc header */ - memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, - entry->ce_datav.iov_len); - resv->iov_len = len + entry->ce_datav.iov_len; - return 1; + slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; + base = (char *)resp->cstate.datap - + (char *)resp->xbuf->head[0].iov_base; + if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, + slot->sl_datalen)) + WARN("%s: sessions DRC could not cache compound\n", __func__); + return; } /* @@ -1095,14 +1032,14 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, struct nfsd4_slot *slot = resp->cstate.slot; dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__, - resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis); + resp->opcnt, resp->cstate.slot->sl_cachethis); /* Encode the replayed sequence operation */ op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); /* Return nfserr_retry_uncached_rep in next operation. */ - if (args->opcnt > 1 && slot->sl_cache_entry.ce_cachethis == 0) { + if (args->opcnt > 1 && slot->sl_cachethis == 0) { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); @@ -1111,57 +1048,29 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, } /* - * Keep the first page of the replay. Copy the NFSv4.1 data from the first - * cached page. Replace any futher replay pages from the cache. + * The sequence operation is not cached because we can use the slot and + * session values. */ __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { - struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; + struct nfsd4_slot *slot = resp->cstate.slot; __be32 status; - dprintk("--> %s entry %p\n", __func__, entry); - - /* - * If this is just the sequence operation, we did not keep - * a page in the cache entry because we can just use the - * slot info stored in struct nfsd4_sequence that was checked - * against the slot in nfsd4_sequence(). - * - * This occurs when seq->cachethis is FALSE, or when the client - * session inactivity timer fires and a solo sequence operation - * is sent (lease renewal). - */ + dprintk("--> %s slot %p\n", __func__, slot); /* Either returns 0 or nfserr_retry_uncached */ status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); if (status == nfserr_retry_uncached_rep) return status; - if (!nfsd41_copy_replay_data(resp, entry)) { - /* - * Not enough room to use the replay rpc header, send the - * cached header. Release all the allocated result pages. - */ - svc_free_res_pages(resp->rqstp); - nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, - entry->ce_resused); - } else { - /* Release all but the first allocated result page */ - - resp->rqstp->rq_resused--; - svc_free_res_pages(resp->rqstp); - - nfsd4_copy_pages(&resp->rqstp->rq_respages[1], - &entry->ce_respages[1], - entry->ce_resused - 1); - } + /* The sequence operation has been encoded, cstate->datap set. */ + memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); - resp->rqstp->rq_resused = entry->ce_resused; - resp->opcnt = entry->ce_opcnt; - resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; - status = entry->ce_status; + resp->opcnt = slot->sl_opcnt; + resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); + status = slot->sl_status; return status; } @@ -1493,7 +1402,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (seq->slotid >= session->se_fchannel.maxreqs) goto out; - slot = &session->se_slots[seq->slotid]; + slot = session->se_slots[seq->slotid]; dprintk("%s: slotid %d\n", __func__, seq->slotid); /* We do not negotiate the number of slots yet, so set the @@ -1506,7 +1415,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status - * for nfsd4_svc_encode_compoundres processing */ + * for nfsd4_proc_compound processing */ status = nfsd4_replay_cache_entry(resp, seq); cstate->status = nfserr_replay_cache; goto out; @@ -1517,7 +1426,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, /* Success! bump slot seqid */ slot->sl_inuse = true; slot->sl_seqid = seq->seqid; - slot->sl_cache_entry.ce_cachethis = seq->cachethis; + slot->sl_cachethis = seq->cachethis; cstate->slot = slot; cstate->session = session; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20c5e3db066..00ed16a1849 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3057,6 +3057,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, WRITE32(0); ADJUST_ARGS(); + resp->cstate.datap = p; /* DRC cache data pointer */ return 0; } @@ -3159,7 +3160,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) return status; session = resp->cstate.session; - if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) + if (session == NULL || slot->sl_cachethis == 0) return status; if (resp->opcnt >= args->opcnt) @@ -3284,6 +3285,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo /* * All that remains is to write the tag and operation count... */ + struct nfsd4_compound_state *cs = &resp->cstate; struct kvec *iov; p = resp->tagp; *p++ = htonl(resp->taglen); @@ -3297,15 +3299,10 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov = &rqstp->rq_res.head[0]; iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); - if (nfsd4_has_session(&resp->cstate)) { - if (resp->cstate.status == nfserr_replay_cache && - !nfsd4_not_cached(resp)) { - iov->iov_len = resp->cstate.iovlen; - } else { - nfsd4_store_cache_entry(resp); - dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); - resp->cstate.slot->sl_inuse = 0; - } + if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); + resp->cstate.slot->sl_inuse = false; nfsd4_put_session(resp->cstate.session); } return 1; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 675d395c4ab..4472449c093 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -577,10 +577,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + rqstp->rq_res.head[0].iov_len; rqstp->rq_res.head[0].iov_len += sizeof(__be32); - /* NFSv4.1 DRC requires statp */ - if (rqstp->rq_vers == 4) - nfsd4_set_statp(rqstp, statp); - /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index ff0b771efde..70ef5f4abbb 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -94,30 +94,23 @@ struct nfs4_cb_conn { /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 -/* Maximum number of pages per slot cache entry */ -#define NFSD_PAGES_PER_SLOT 1 -#define NFSD_SLOT_CACHE_SIZE PAGE_SIZE /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 +/* Maximum session per slot cache size */ +#define NFSD_SLOT_CACHE_SIZE 1024 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 #define NFSD_MAX_MEM_PER_SESSION \ (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE) -struct nfsd4_cache_entry { - __be32 ce_status; - struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */ - struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1]; - int ce_cachethis; - short ce_resused; - int ce_opcnt; - int ce_rpchdrlen; -}; - struct nfsd4_slot { - bool sl_inuse; - u32 sl_seqid; - struct nfsd4_cache_entry sl_cache_entry; + bool sl_inuse; + bool sl_cachethis; + u16 sl_opcnt; + u32 sl_seqid; + __be32 sl_status; + u32 sl_datalen; + char sl_data[]; }; struct nfsd4_channel_attrs { @@ -159,7 +152,7 @@ struct nfsd4_session { struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; - struct nfsd4_slot se_slots[]; /* forward channel slots */ + struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; static inline void diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 3f716607c86..73164c2b3d2 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -51,7 +51,7 @@ struct nfsd4_compound_state { /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; - __be32 *statp; + __be32 *datap; size_t iovlen; u32 minorversion; u32 status; @@ -472,8 +472,7 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) { - return !resp->cstate.slot->sl_cache_entry.ce_cachethis || - nfsd4_is_solo_sequence(resp); + return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp); } #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) -- cgit v1.2.3-70-g09d2 From 4cfc7e6019caa3e97d2a81c48c8d575d7b38d751 Mon Sep 17 00:00:00 2001 From: Rahul Iyer Date: Thu, 10 Sep 2009 17:32:28 +0300 Subject: nfsd41: sunrpc: Added rpc server-side backchannel handling When the call direction is a reply, copy the xid and call direction into the req->rq_private_buf.head[0].iov_base otherwise rpc_verify_header returns rpc_garbage. Signed-off-by: Rahul Iyer Signed-off-by: Mike Sager Signed-off-by: Marc Eshel Signed-off-by: Benny Halevy Signed-off-by: Ricardo Labiaga Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy [get rid of CONFIG_NFSD_V4_1] [sunrpc: refactoring of svc_tcp_recvfrom] [nfsd41: sunrpc: create common send routine for the fore and the back channels] [nfsd41: sunrpc: Use free_page() to free server backchannel pages] [nfsd41: sunrpc: Document server backchannel locking] [nfsd41: sunrpc: remove bc_connect_worker()] [nfsd41: sunrpc: Define xprt_server_backchannel()[ [nfsd41: sunrpc: remove bc_close and bc_init_auto_disconnect dummy functions] [nfsd41: sunrpc: eliminate unneeded switch statement in xs_setup_tcp()] [nfsd41: sunrpc: Don't auto close the server backchannel connection] [nfsd41: sunrpc: Remove unused functions] Signed-off-by: Alexandros Batsakis Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfsd41: change bc_sock to bc_xprt] [nfsd41: sunrpc: move struct rpc_buffer def into a common header file] [nfsd41: sunrpc: use rpc_sleep in bc_send_request so not to block on mutex] [removed cosmetic changes] Signed-off-by: Benny Halevy [sunrpc: add new xprt class for nfsv4.1 backchannel] [sunrpc: v2.1 change handling of auto_close and init_auto_disconnect operations for the nfsv4.1 backchannel] Signed-off-by: Alexandros Batsakis [reverted more cosmetic leftovers] [got rid of xprt_server_backchannel] [separated "nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel"] Signed-off-by: Benny Halevy Cc: Trond Myklebust [sunrpc: change idle timeout value for the backchannel] Signed-off-by: Alexandros Batsakis Signed-off-by: Benny Halevy Acked-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + include/linux/sunrpc/svcsock.h | 1 + include/linux/sunrpc/xprt.h | 1 + net/sunrpc/sunrpc.h | 4 + net/sunrpc/svc_xprt.c | 2 + net/sunrpc/svcsock.c | 172 ++++++++++++++++++++++++++++++++-------- net/sunrpc/xprt.c | 15 +++- net/sunrpc/xprtsock.c | 146 ++++++++++++++++++++++++++++++++++ 8 files changed, 303 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 2223ae0b5ed..5f4e18b3ce7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -65,6 +65,7 @@ struct svc_xprt { size_t xpt_locallen; /* length of address */ struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ + struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ }; int svc_reg_xprt_class(struct svc_xprt_class *); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 04dba23c59f..1b353a76c30 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,6 +28,7 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ + struct rpc_xprt *sk_bc_xprt; /* NFSv4.1 backchannel xprt */ }; /* diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c090df44257..228d694dbb9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -179,6 +179,7 @@ struct rpc_xprt { spinlock_t reserve_lock; /* lock slot table */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_NFS_V4_1) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 13171e63f51..90c292e2738 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -43,5 +43,9 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +int svc_send_common(struct socket *sock, struct xdr_buf *xdr, + struct page *headpage, unsigned long headoffset, + struct page *tailpage, unsigned long tailoffset); + #endif /* _NET_SUNRPC_SUNRPC_H */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 912dea558cc..df124f78ee4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); + rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); } EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -810,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp) else len = xprt->xpt_ops->xpo_sendto(rqstp); mutex_unlock(&xprt->xpt_mutex); + rpc_wake_up(&xprt->xpt_bc_pending); svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 76a380d37de..ccc5e83cae5 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -49,6 +49,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) } /* - * Generic sendto routine + * send routine intended to be shared by the fore- and back-channel */ -static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) +int svc_send_common(struct socket *sock, struct xdr_buf *xdr, + struct page *headpage, unsigned long headoffset, + struct page *tailpage, unsigned long tailoffset) { - struct svc_sock *svsk = - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - struct socket *sock = svsk->sk_sock; - int slen; - union { - struct cmsghdr hdr; - long all[SVC_PKTINFO_SPACE / sizeof(long)]; - } buffer; - struct cmsghdr *cmh = &buffer.hdr; - int len = 0; int result; int size; struct page **ppage = xdr->pages; size_t base = xdr->page_base; unsigned int pglen = xdr->page_len; unsigned int flags = MSG_MORE; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); + int slen; + int len = 0; slen = xdr->len; - if (rqstp->rq_prot == IPPROTO_UDP) { - struct msghdr msg = { - .msg_name = &rqstp->rq_addr, - .msg_namelen = rqstp->rq_addrlen, - .msg_control = cmh, - .msg_controllen = sizeof(buffer), - .msg_flags = MSG_MORE, - }; - - svc_set_cmsg_data(rqstp, cmh); - - if (sock_sendmsg(sock, &msg, 0) < 0) - goto out; - } - /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, + len = kernel_sendpage(sock, headpage, headoffset, xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; @@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) base = 0; ppage++; } + /* send tail */ if (xdr->tail[0].iov_len) { - result = kernel_sendpage(sock, rqstp->rq_respages[0], - ((unsigned long)xdr->tail[0].iov_base) - & (PAGE_SIZE-1), - xdr->tail[0].iov_len, 0); - + result = kernel_sendpage(sock, tailpage, tailoffset, + xdr->tail[0].iov_len, 0); if (result > 0) len += result; } + +out: + return len; +} + + +/* + * Generic sendto routine + */ +static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) +{ + struct svc_sock *svsk = + container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct socket *sock = svsk->sk_sock; + union { + struct cmsghdr hdr; + long all[SVC_PKTINFO_SPACE / sizeof(long)]; + } buffer; + struct cmsghdr *cmh = &buffer.hdr; + int len = 0; + unsigned long tailoff; + unsigned long headoff; + RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); + + if (rqstp->rq_prot == IPPROTO_UDP) { + struct msghdr msg = { + .msg_name = &rqstp->rq_addr, + .msg_namelen = rqstp->rq_addrlen, + .msg_control = cmh, + .msg_controllen = sizeof(buffer), + .msg_flags = MSG_MORE, + }; + + svc_set_cmsg_data(rqstp, cmh); + + if (sock_sendmsg(sock, &msg, 0) < 0) + goto out; + } + + tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1); + headoff = 0; + len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff, + rqstp->rq_respages[0], tailoff); + out: dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, @@ -951,6 +972,57 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } +static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, + struct rpc_rqst **reqpp, struct kvec *vec) +{ + struct rpc_rqst *req = NULL; + u32 *p; + u32 xid; + u32 calldir; + int len; + + len = svc_recvfrom(rqstp, vec, 1, 8); + if (len < 0) + goto error; + + p = (u32 *)rqstp->rq_arg.head[0].iov_base; + xid = *p++; + calldir = *p; + + if (calldir == 0) { + /* REQUEST is the most common case */ + vec[0] = rqstp->rq_arg.head[0]; + } else { + /* REPLY */ + if (svsk->sk_bc_xprt) + req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); + + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x sk_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + svsk->sk_bc_xprt, xid); + vec[0] = rqstp->rq_arg.head[0]; + goto out; + } + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + sizeof(struct xdr_buf)); + /* copy the xid and call direction */ + memcpy(req->rq_private_buf.head[0].iov_base, + rqstp->rq_arg.head[0].iov_base, 8); + vec[0] = req->rq_private_buf.head[0]; + } + out: + vec[0].iov_base += 8; + vec[0].iov_len -= 8; + len = svsk->sk_reclen - 8; + error: + *reqpp = req; + return len; +} + /* * Receive data from a TCP socket. */ @@ -962,6 +1034,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) int len; struct kvec *vec; int pnum, vlen; + struct rpc_rqst *req = NULL; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -975,9 +1048,27 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) vec = rqstp->rq_vec; vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; + + /* + * We have enough data for the whole tcp record. Let's try and read the + * first 8 bytes to get the xid and the call direction. We can use this + * to figure out if this is a call or a reply to a callback. If + * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. + * In that case, don't bother with the calldir and just read the data. + * It will be rejected in svc_process. + */ + if (len >= 8) { + len = svc_process_calldir(svsk, rqstp, &req, vec); + if (len < 0) + goto err_again; + vlen -= 8; + } + pnum = 1; while (vlen < len) { - vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); + vec[pnum].iov_base = (req) ? + page_address(req->rq_private_buf.pages[pnum - 1]) : + page_address(rqstp->rq_pages[pnum]); vec[pnum].iov_len = PAGE_SIZE; pnum++; vlen += PAGE_SIZE; @@ -989,6 +1080,16 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto err_again; + /* + * Account for the 8 bytes we read earlier + */ + len += 8; + + if (req) { + xprt_complete_rqst(req->rq_task, len); + len = 0; + goto out; + } dprintk("svc: TCP complete record (%d bytes)\n", len); rqstp->rq_arg.len = len; rqstp->rq_arg.page_base = 0; @@ -1002,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; +out: /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f412a852bc7..fd46d42afa8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static inline int xprt_has_timer(struct rpc_xprt *xprt) +{ + return xprt->idle_timeout != 0; +} + /** * xprt_prepare_transmit - reserve the transport before sending a request * @task: RPC task about to send a request @@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task) if (!list_empty(&req->rq_list)) list_del(&req->rq_list); xprt->last_used = jiffies; - if (list_empty(&xprt->recv)) + if (list_empty(&xprt->recv) && xprt_has_timer(xprt)) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); @@ -1082,8 +1087,11 @@ found: #endif /* CONFIG_NFS_V4_1 */ INIT_WORK(&xprt->task_cleanup, xprt_autoclose); - setup_timer(&xprt->timer, xprt_init_autodisconnect, - (unsigned long)xprt); + if (xprt_has_timer(xprt)) + setup_timer(&xprt->timer, xprt_init_autodisconnect, + (unsigned long)xprt); + else + init_timer(&xprt->timer); xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; xprt->bind_index = 0; @@ -1102,7 +1110,6 @@ found: dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); - return xprt; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 62438f3a914..d9a2b815714 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_NFS_V4_1 @@ -43,6 +44,7 @@ #include #include +#include "sunrpc.h" /* * xprtsock tunables */ @@ -2098,6 +2100,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.bklog_u); } +/* + * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason + * we allocate pages instead doing a kmalloc like rpc_malloc is because we want + * to use the server side send routines. + */ +void *bc_malloc(struct rpc_task *task, size_t size) +{ + struct page *page; + struct rpc_buffer *buf; + + BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer)); + page = alloc_page(GFP_KERNEL); + + if (!page) + return NULL; + + buf = page_address(page); + buf->len = PAGE_SIZE; + + return buf->data; +} + +/* + * Free the space allocated in the bc_alloc routine + */ +void bc_free(void *buffer) +{ + struct rpc_buffer *buf; + + if (!buffer) + return; + + buf = container_of(buffer, struct rpc_buffer, data); + free_page((unsigned long)buf); +} + +/* + * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex + * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request. + */ +static int bc_sendto(struct rpc_rqst *req) +{ + int len; + struct xdr_buf *xbufp = &req->rq_snd_buf; + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + unsigned long headoff; + unsigned long tailoff; + + /* + * Set up the rpc header and record marker stuff + */ + xs_encode_tcp_record_marker(xbufp); + + tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; + headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; + len = svc_send_common(sock, xbufp, + virt_to_page(xbufp->head[0].iov_base), headoff, + xbufp->tail[0].iov_base, tailoff); + + if (len != xbufp->len) { + printk(KERN_NOTICE "Error sending entire callback!\n"); + len = -EAGAIN; + } + + return len; +} + +/* + * The send routine. Borrows from svc_send + */ +static int bc_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct svc_xprt *xprt; + struct svc_sock *svsk; + u32 len; + + dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); + /* + * Get the server socket associated with this callback xprt + */ + xprt = req->rq_xprt->bc_xprt; + svsk = container_of(xprt, struct svc_sock, sk_xprt); + + /* + * Grab the mutex to serialize data as the connection is shared + * with the fore channel + */ + if (!mutex_trylock(&xprt->xpt_mutex)) { + rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL); + if (!mutex_trylock(&xprt->xpt_mutex)) + return -EAGAIN; + rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task); + } + if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + len = -ENOTCONN; + else + len = bc_sendto(req); + mutex_unlock(&xprt->xpt_mutex); + + if (len > 0) + len = 0; + + return len; +} + +/* + * The close routine. Since this is client initiated, we do nothing + */ + +static void bc_close(struct rpc_xprt *xprt) +{ + return; +} + +/* + * The xprt destroy routine. Again, because this connection is client + * initiated, we do nothing + */ + +static void bc_destroy(struct rpc_xprt *xprt) +{ + return; +} + static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, @@ -2134,6 +2264,22 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; +/* + * The rpc_xprt_ops for the server backchannel + */ + +static struct rpc_xprt_ops bc_tcp_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xprt_release_xprt, + .buf_alloc = bc_malloc, + .buf_free = bc_free, + .send_request = bc_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = bc_close, + .destroy = bc_destroy, + .print_stats = xs_tcp_print_stats, +}; + static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, unsigned int slot_table_size) { -- cgit v1.2.3-70-g09d2 From f300baba5a1536070d6d77bf0c8c4ca999bb4f0f Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Thu, 10 Sep 2009 17:33:30 +0300 Subject: nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel [sunrpc: change idle timeout value for the backchannel] Signed-off-by: Alexandros Batsakis Signed-off-by: Benny Halevy Acked-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 18 ++++++++ include/linux/sunrpc/xprtrdma.h | 5 --- include/linux/sunrpc/xprtsock.h | 11 ----- net/sunrpc/clnt.c | 1 + net/sunrpc/xprtsock.c | 96 ++++++++++++++++++++++++++++++++++++++++- 6 files changed, 114 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 3d025588e56..8ed9642a5a7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -114,6 +114,7 @@ struct rpc_create_args { rpc_authflavor_t authflavor; unsigned long flags; char *client_name; + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ }; /* Values for "flags" field */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 228d694dbb9..6f9457a75b8 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -124,6 +124,23 @@ struct rpc_xprt_ops { void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); }; +/* + * RPC transport identifiers + * + * To preserve compatibility with the historical use of raw IP protocol + * id's for transport selection, UDP and TCP identifiers are specified + * with the previous values. No such restriction exists for new transports, + * except that they may not collide with these values (17 and 6, + * respectively). + */ +#define XPRT_TRANSPORT_BC (1 << 31) +enum xprt_transports { + XPRT_TRANSPORT_UDP = IPPROTO_UDP, + XPRT_TRANSPORT_TCP = IPPROTO_TCP, + XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, + XPRT_TRANSPORT_RDMA = 256 +}; + struct rpc_xprt { struct kref kref; /* Reference count */ struct rpc_xprt_ops * ops; /* transport methods */ @@ -232,6 +249,7 @@ struct xprt_create { struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ }; struct xprt_class { diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 54a379c9e8e..c2f04e1ae15 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -40,11 +40,6 @@ #ifndef _LINUX_SUNRPC_XPRTRDMA_H #define _LINUX_SUNRPC_XPRTRDMA_H -/* - * RPC transport identifier for RDMA - */ -#define XPRT_TRANSPORT_RDMA 256 - /* * rpcbind (v3+) RDMA netid. */ diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index c2a46c45c8f..3f14a02e9cc 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -12,17 +12,6 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); -/* - * RPC transport identifiers for UDP, TCP - * - * To preserve compatibility with the historical use of raw IP protocol - * id's for transport selection, these are specified with the previous - * values. No such restriction exists for new transports, except that - * they may not collide with these values (17 and 6, respectively). - */ -#define XPRT_TRANSPORT_UDP IPPROTO_UDP -#define XPRT_TRANSPORT_TCP IPPROTO_TCP - /* * RPC slot table sizes for UDP, TCP transports */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c1e467e1b07..7389804e3bb 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -288,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, + .bc_xprt = args->bc_xprt, }; char servername[48]; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d9a2b815714..bee41546575 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2468,11 +2468,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) return ERR_PTR(-EINVAL); } +/** + * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket + * @args: rpc transport creation arguments + * + */ +static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) +{ + struct sockaddr *addr = args->dstaddr; + struct rpc_xprt *xprt; + struct sock_xprt *transport; + struct svc_sock *bc_sock; + + if (!args->bc_xprt) + ERR_PTR(-EINVAL); + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = IPPROTO_TCP; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + xprt->timeout = &xs_tcp_default_timeout; + + /* backchannel */ + xprt_set_bound(xprt); + xprt->bind_timeout = 0; + xprt->connect_timeout = 0; + xprt->reestablish_timeout = 0; + xprt->idle_timeout = 0; + + /* + * The backchannel uses the same socket connection as the + * forechannel + */ + xprt->bc_xprt = args->bc_xprt; + bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); + bc_sock->sk_bc_xprt = xprt; + transport->sock = bc_sock->sk_sock; + transport->inet = bc_sock->sk_sk; + + xprt->ops = &bc_tcp_ops; + + switch (addr->sa_family) { + case AF_INET: + xs_format_peer_addresses(xprt, "tcp", + RPCBIND_NETID_TCP); + break; + case AF_INET6: + xs_format_peer_addresses(xprt, "tcp", + RPCBIND_NETID_TCP6); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + + /* + * Since we don't want connections for the backchannel, we set + * the xprt status to connected + */ + xprt_set_connected(xprt); + + + if (try_module_get(THIS_MODULE)) + return xprt; + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); +} + static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", .owner = THIS_MODULE, - .ident = IPPROTO_UDP, + .ident = XPRT_TRANSPORT_UDP, .setup = xs_setup_udp, }; @@ -2480,10 +2562,18 @@ static struct xprt_class xs_tcp_transport = { .list = LIST_HEAD_INIT(xs_tcp_transport.list), .name = "tcp", .owner = THIS_MODULE, - .ident = IPPROTO_TCP, + .ident = XPRT_TRANSPORT_TCP, .setup = xs_setup_tcp, }; +static struct xprt_class xs_bc_tcp_transport = { + .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), + .name = "tcp NFSv4.1 backchannel", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_BC_TCP, + .setup = xs_setup_bc_tcp, +}; + /** * init_socket_xprt - set up xprtsock's sysctls, register with RPC client * @@ -2497,6 +2587,7 @@ int init_socket_xprt(void) xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); + xprt_register_transport(&xs_bc_tcp_transport); return 0; } @@ -2516,6 +2607,7 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); + xprt_unregister_transport(&xs_bc_tcp_transport); } static int param_set_uint_minmax(const char *val, struct kernel_param *kp, -- cgit v1.2.3-70-g09d2 From 5d351754fcf58d1a604aa7cf95c2805e8a098ad9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Sep 2009 13:32:13 -0400 Subject: SUNRPC: Defer the auth_gss upcall when the RPC call is asynchronous Otherwise, the upcall is going to be synchronous, which may not be what the caller wants... Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/auth.h | 4 ++-- net/sunrpc/auth.c | 20 ++++++++++++-------- net/sunrpc/auth_generic.c | 4 ++-- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 3f632182d8e..996df4dac7d 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -111,7 +111,7 @@ struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); - void (*crbind)(struct rpc_task *, struct rpc_cred *); + void (*crbind)(struct rpc_task *, struct rpc_cred *, int); __be32 * (*crmarshal)(struct rpc_task *, __be32 *); int (*crrefresh)(struct rpc_task *); __be32 * (*crvalidate)(struct rpc_task *, __be32 *); @@ -140,7 +140,7 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred * void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int); -void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *); +void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); void put_rpccred(struct rpc_cred *); void rpcauth_unbindcred(struct rpc_task *); __be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 0c431c277af..54a4e042f10 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, EXPORT_SYMBOL_GPL(rpcauth_init_cred); void -rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { task->tk_msg.rpc_cred = get_rpccred(cred); dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, @@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); static void -rpcauth_bind_root_cred(struct rpc_task *task) +rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { @@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task) dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - ret = auth->au_ops->lookup_cred(auth, &acred, 0); + ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else @@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task) } static void -rpcauth_bind_new_cred(struct rpc_task *task) +rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, auth->au_ops->au_name); - ret = rpcauth_lookupcred(auth, 0); + ret = rpcauth_lookupcred(auth, lookupflags); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else @@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task) void rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { + int lookupflags = 0; + + if (flags & RPC_TASK_ASYNC) + lookupflags |= RPCAUTH_LOOKUP_NEW; if (cred != NULL) - cred->cr_ops->crbind(task, cred); + cred->cr_ops->crbind(task, cred, lookupflags); else if (flags & RPC_TASK_ROOTCREDS) - rpcauth_bind_root_cred(task); + rpcauth_bind_root_cred(task, lookupflags); else - rpcauth_bind_new_cred(task); + rpcauth_bind_new_cred(task, lookupflags); } void diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 4028502f052..bf88bf8e936 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void) EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); static void -generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; struct rpc_cred *ret; - ret = auth->au_ops->lookup_cred(auth, acred, 0); + ret = auth->au_ops->lookup_cred(auth, acred, lookupflags); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else -- cgit v1.2.3-70-g09d2 From 29ab23cc5d351658d01a4327d55e9106a73fd04f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 15 Sep 2009 15:56:50 -0400 Subject: nfsd4: allow nfs4 state startup to fail The failure here is pretty unlikely, but we should handle it anyway. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 17 ++++++++++++----- fs/nfsd/nfssvc.c | 4 +++- include/linux/nfsd/nfsd.h | 4 ++-- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 46e9ac52687..11db40cb2f2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4004,7 +4004,7 @@ set_max_delegations(void) /* initialization to perform when the nfsd service is started: */ -static void +static int __nfs4_state_start(void) { unsigned long grace_time; @@ -4016,19 +4016,26 @@ __nfs4_state_start(void) printk(KERN_INFO "NFSD: starting %ld-second grace period\n", grace_time/HZ); laundry_wq = create_singlethread_workqueue("nfsd4"); + if (laundry_wq == NULL) + return -ENOMEM; queue_delayed_work(laundry_wq, &laundromat_work, grace_time); set_max_delegations(); + return 0; } -void +int nfs4_state_start(void) { + int ret; + if (nfs4_init) - return; + return 0; nfsd4_load_reboot_recovery_data(); - __nfs4_state_start(); + ret = __nfs4_state_start(); + if (ret) + return ret; nfs4_init = 1; - return; + return 0; } time_t diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 4472449c093..fcc00108826 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -411,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs) error = nfsd_racache_init(2*nrservs); if (error<0) goto out; - nfs4_state_start(); + error = nfs4_state_start(); + if (error) + goto out; nfsd_reset_versions(); diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 2812ed52669..24fdf89cea8 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -166,7 +166,7 @@ extern int nfsd_max_blksize; extern unsigned int max_delegations; int nfs4_state_init(void); void nfsd4_free_slabs(void); -void nfs4_state_start(void); +int nfs4_state_start(void); void nfs4_state_shutdown(void); time_t nfs4_lease_time(void); void nfs4_reset_lease(time_t leasetime); @@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir); #else static inline int nfs4_state_init(void) { return 0; } static inline void nfsd4_free_slabs(void) { } -static inline void nfs4_state_start(void) { } +static inline int nfs4_state_start(void) { } static inline void nfs4_state_shutdown(void) { } static inline time_t nfs4_lease_time(void) { return 0; } static inline void nfs4_reset_lease(time_t leasetime) { } -- cgit v1.2.3-70-g09d2 From 80fc015bdfe1f5b870c1e1ee02d78e709523fee7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 15 Sep 2009 18:07:35 -0400 Subject: nfsd4: use common rpc_cred for all callbacks Callbacks are always made using the machine's identity, so we can use a single auth_generic credential shared among callbacks to all clients and let the rpc code take care of the rest. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 33 ++++++++++----------------------- fs/nfsd/nfs4state.c | 6 +----- include/linux/nfsd/state.h | 2 +- 3 files changed, 12 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4abb88264c7..128519769ea 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -432,42 +432,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { .rpc_call_done = nfsd4_cb_probe_done, }; -static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) +static struct rpc_cred *callback_cred; + +int set_callback_cred(void) { - struct auth_cred acred = { - .machine_cred = 1 - }; - struct rpc_auth *auth = cb->cb_client->cl_auth; - - /* - * Note in the gss case this doesn't actually have to wait for a - * gss upcall (or any calls to the client); this just creates a - * non-uptodate cred which the rpc state machine will fill in with - * a refresh_upcall later. - */ - return auth->au_ops->lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); + callback_cred = rpc_lookup_machine_cred(); + if (!callback_cred) + return -ENOMEM; + return 0; } + void do_probe_callback(struct nfs4_client *clp) { struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, + .rpc_cred = callback_cred }; - struct rpc_cred *cred; int status; - cred = lookup_cb_cred(cb); - if (IS_ERR(cred)) { - status = PTR_ERR(cred); - goto out; - } - cb->cb_cred = cred; - msg.rpc_cred = cb->cb_cred; status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, &nfsd4_cb_probe_ops, (void *)clp); -out: if (status) { warn_no_callback_path(clp, status); put_nfs4_client(clp); @@ -550,7 +537,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], .rpc_argp = dp, - .rpc_cred = clp->cl_cb_conn.cb_cred + .rpc_cred = callback_cred }; int status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 11db40cb2f2..0445192d660 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -696,10 +696,6 @@ shutdown_callback_client(struct nfs4_client *clp) clp->cl_cb_conn.cb_client = NULL; rpc_shutdown_client(clnt); } - if (clp->cl_cb_conn.cb_cred) { - put_rpccred(clp->cl_cb_conn.cb_cred); - clp->cl_cb_conn.cb_cred = NULL; - } } static inline void @@ -4020,7 +4016,7 @@ __nfs4_state_start(void) return -ENOMEM; queue_delayed_work(laundry_wq, &laundromat_work, grace_time); set_max_delegations(); - return 0; + return set_callback_cred(); } int diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 70ef5f4abbb..9bf3aa8c5ae 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -89,7 +89,6 @@ struct nfs4_cb_conn { /* RPC client info */ atomic_t cb_set; /* successful CB_NULL call */ struct rpc_clnt * cb_client; - struct rpc_cred * cb_cred; }; /* Maximum number of slots per session. 160 is useful for long haul TCP */ @@ -362,6 +361,7 @@ extern int nfs4_in_grace(void); extern __be32 nfs4_check_open_reclaim(clientid_t *clid); extern void put_nfs4_client(struct nfs4_client *clp); extern void nfs4_free_stateowner(struct kref *kref); +extern int set_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern void nfs4_put_delegation(struct nfs4_delegation *dp); -- cgit v1.2.3-70-g09d2 From 38524ab38f2752beee262a97403d871665838172 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 10 Sep 2009 12:25:59 +0300 Subject: nfsd41: Backchannel: callback infrastructure Keep the xprt used for create_session in cl_cb_xprt. Mark cl_callback.cb_minorversion = 1 and remember the client provided cl_callback.cb_prog rpc program number. Use it to probe the callback path. Use the client's network address to initialize as the callback's address as expected by the xprt creation routines. Define xdr sizes and code nfs4_cb_compound header to be able to send a null callback rpc. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Ricardo Labiaga [get callback minorversion from fore channel's] Signed-off-by: Benny Halevy [nfsd41: change bc_sock to bc_xprt] Signed-off-by: Benny Halevy [pulled definition for cl_cb_xprt] Signed-off-by: Benny Halevy [nfsd41: set up backchannel's cb_addr] [moved rpc_create_args init to "nfsd: modify nfsd4.1 backchannel to use new xprt class"] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 17 +++++++++++++++-- fs/nfsd/nfs4state.c | 14 ++++++++++++++ include/linux/nfsd/state.h | 3 +++ 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 128519769ea..db4188ce9b0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -52,16 +53,19 @@ #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 +#define NFS4_STATEID_SIZE 16 /* Index of predefined Linux callback client operations */ enum { NFSPROC4_CLNT_CB_NULL = 0, NFSPROC4_CLNT_CB_RECALL, + NFSPROC4_CLNT_CB_SEQUENCE, }; enum nfs_cb_opnum4 { OP_CB_RECALL = 4, + OP_CB_SEQUENCE = 11, }; #define NFS4_MAXTAGLEN 20 @@ -70,15 +74,22 @@ enum nfs_cb_opnum4 { #define NFS4_dec_cb_null_sz 0 #define cb_compound_enc_hdr_sz 4 #define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define cb_sequence_enc_sz (sessionid_sz + 4 + \ + 1 /* no referring calls list yet */) +#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) + #define op_enc_sz 1 #define op_dec_sz 2 #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) #define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ 1 + enc_stateid_sz + \ enc_nfs4_fh_sz) #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ op_dec_sz) /* @@ -137,11 +148,13 @@ xdr_error: \ } while (0) struct nfs4_cb_compound_hdr { - int status; - u32 ident; + /* args */ + u32 ident; /* minorversion 0 only */ u32 nops; __be32 *nops_p; u32 minorversion; + /* res */ + int status; u32 taglen; char *tag; }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0445192d660..d8196b453f6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -702,6 +702,8 @@ static inline void free_client(struct nfs4_client *clp) { shutdown_callback_client(clp); + if (clp->cl_cb_xprt) + svc_xprt_put(clp->cl_cb_xprt); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -1317,6 +1319,18 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; cr_ses->flags &= ~SESSION4_RDMA; + if (cr_ses->flags & SESSION4_BACK_CHAN) { + unconf->cl_cb_xprt = rqstp->rq_xprt; + svc_xprt_get(unconf->cl_cb_xprt); + rpc_copy_addr( + (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, + sa); + unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa); + unconf->cl_cb_conn.cb_minorversion = + cstate->minorversion; + unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; + nfsd4_probe_callback(unconf); + } conf = unconf; } else { status = nfserr_stale_clientid; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 9bf3aa8c5ae..c916032570c 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -211,6 +211,9 @@ struct nfs4_client { struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; struct nfs4_sessionid cl_sessionid; + + /* for nfs41 callbacks */ + struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */ }; /* struct nfs4_client_reset -- cgit v1.2.3-70-g09d2 From 132f97715c098393fb8de3c26b07b9fdbd2334f1 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Thu, 10 Sep 2009 12:26:12 +0300 Subject: nfsd41: Backchannel: Add sequence arguments to callback RPC arguments Follow the model we use in the client. Make the sequence arguments part of the regular RPC arguments. None of the callbacks that are soon to be implemented expect results that need to be passed back to the caller, so we don't define a separate RPC results structure. For session validation, the cb_sequence decoding will use a pointer to the sequence arguments that are part of the RPC argument. Signed-off-by: Ricardo Labiaga [define struct nfsd4_cb_sequence here] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 5 +++++ include/linux/nfsd/state.h | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index db4188ce9b0..f31175717c1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -92,6 +92,11 @@ enum nfs_cb_opnum4 { cb_sequence_dec_sz + \ op_dec_sz) +struct nfs4_rpc_args { + void *args_op; + struct nfsd4_cb_sequence args_seq; +}; + /* * Generic encode routines from fs/nfs/nfs4xdr.c */ diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index c916032570c..0e5b5aecde0 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -60,6 +60,12 @@ typedef struct { #define si_stateownerid si_opaque.so_stateownerid #define si_fileid si_opaque.so_fileid +struct nfsd4_cb_sequence { + /* args/res */ + u32 cbs_minorversion; + struct nfs4_client *cbs_clp; +}; + struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; -- cgit v1.2.3-70-g09d2 From 199ff35e1c8724871e157c2e48556c2794946e82 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Thu, 10 Sep 2009 12:26:25 +0300 Subject: nfsd41: Backchannel: Server backchannel RPC wait queue RPC callback requests will wait on this wait queue if the backchannel is out of slots. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 ++ include/linux/nfsd/state.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d8196b453f6..f4cebd9016b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -775,6 +775,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_sessions); INIT_LIST_HEAD(&clp->cl_lru); + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 0e5b5aecde0..9cc40a137c3 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -219,7 +219,11 @@ struct nfs4_client { struct nfs4_sessionid cl_sessionid; /* for nfs41 callbacks */ + /* We currently support a single back channel with a single slot */ + unsigned long cl_cb_slot_busy; struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */ + struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ + /* wait here for slots */ }; /* struct nfs4_client_reset -- cgit v1.2.3-70-g09d2 From 2a1d1b593803d7c18a369bf148f3b48c5a3260fc Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Thu, 10 Sep 2009 12:26:38 +0300 Subject: nfsd41: Backchannel: Setup sequence information Follows the model used by the NFS client. Setup the RPC prepare and done function pointers so that we can populate the sequence information if minorversion == 1. rpc_run_task() is then invoked directly just like existing NFS client operations do. nfsd4_cb_prepare() determines if the sequence information needs to be setup. If the slot is in use, it adds itself to the wait queue. nfsd4_cb_done() wakes anyone sleeping on the callback channel wait queue after our RPC reply has been received. It also sets the task message result pointer to NULL to clearly indicate we're done using it. Signed-off-by: Ricardo Labiaga [define and initialize cl_cb_seq_nr here] [pulled out unused defintion of nfsd4_cb_done] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfs4state.c | 1 + include/linux/nfsd/state.h | 1 + 3 files changed, 64 insertions(+) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f31175717c1..25a09069e45 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -501,6 +501,67 @@ nfsd4_probe_callback(struct nfs4_client *clp) do_probe_callback(clp); } +/* + * There's currently a single callback channel slot. + * If the slot is available, then mark it busy. Otherwise, set the + * thread for sleeping on the callback RPC wait queue. + */ +static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, + struct rpc_task *task) +{ + struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; + u32 *ptr = (u32 *)clp->cl_sessionid.data; + int status = 0; + + dprintk("%s: %u:%u:%u:%u\n", __func__, + ptr[0], ptr[1], ptr[2], ptr[3]); + + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); + dprintk("%s slot is busy\n", __func__); + status = -EAGAIN; + goto out; + } + + /* + * We'll need the clp during XDR encoding and decoding, + * and the sequence during decoding to verify the reply + */ + args->args_seq.cbs_clp = clp; + task->tk_msg.rpc_resp = &args->args_seq; + +out: + dprintk("%s status=%d\n", __func__, status); + return status; +} + +/* + * TODO: cb_sequence should support referring call lists, cachethis, multiple + * slots, and mark callback channel down on communication errors. + */ +static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_delegation *dp = calldata; + struct nfs4_client *clp = dp->dl_client; + struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; + u32 minorversion = clp->cl_cb_conn.cb_minorversion; + int status = 0; + + args->args_seq.cbs_minorversion = minorversion; + if (minorversion) { + status = nfsd41_cb_setup_sequence(clp, task); + if (status) { + if (status != -EAGAIN) { + /* terminate rpc task */ + task->tk_status = status; + task->tk_action = NULL; + } + return; + } + } + rpc_call_start(task); +} + static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfs4_delegation *dp = calldata; @@ -540,6 +601,7 @@ static void nfsd4_cb_recall_release(void *calldata) } static const struct rpc_call_ops nfsd4_cb_recall_ops = { + .rpc_call_prepare = nfsd4_cb_prepare, .rpc_call_done = nfsd4_cb_recall_done, .rpc_release = nfsd4_cb_recall_release, }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f4cebd9016b..76b7bcbb3f2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1331,6 +1331,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, unconf->cl_cb_conn.cb_minorversion = cstate->minorversion; unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; + unconf->cl_cb_seq_nr = 1; nfsd4_probe_callback(unconf); } conf = unconf; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 9cc40a137c3..b38d1132418 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -221,6 +221,7 @@ struct nfs4_client { /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ unsigned long cl_cb_slot_busy; + u32 cl_cb_seq_nr; struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */ struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ -- cgit v1.2.3-70-g09d2 From c0826574ddc0df486ecfc2d655e08904c6513209 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 17 Sep 2009 17:03:06 +1000 Subject: nfsd: return success for non-NFS4 nfs4_state_start Today's linux-next build (sparc64_defconfig) failed like this: In file included from arch/sparc/kernel/sys_sparc32.c:32: include/linux/nfsd/nfsd.h: In function 'nfs4_state_start': include/linux/nfsd/nfsd.h:177: error: no return statement in function returning non-void Caused by commit 29ab23cc5d351658d01a4327d55e9106a73fd04f ("nfsd4: allow nfs4 state startup to fail"). Please, if you add code that depends on a CONFIG option, build with that option enabled and disabled. Signed-off-by: Stephen Rothwell Signed-off-by: J. Bruce Fields --- include/linux/nfsd/nfsd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 24fdf89cea8..03bbe903910 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir); #else static inline int nfs4_state_init(void) { return 0; } static inline void nfsd4_free_slabs(void) { } -static inline int nfs4_state_start(void) { } +static inline int nfs4_state_start(void) { return 0; } static inline void nfs4_state_shutdown(void) { } static inline time_t nfs4_lease_time(void) { return 0; } static inline void nfs4_reset_lease(time_t leasetime) { } -- cgit v1.2.3-70-g09d2