diff options
Diffstat (limited to 'fs/nfs')
41 files changed, 9628 insertions, 4027 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f7e13db613c..ba306658a6d 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -76,13 +76,17 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" - depends on NFS_V4 && EXPERIMENTAL + depends on NFS_FS && NFS_V4 && EXPERIMENTAL + select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol - (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. + (RFC 5661) in the kernel's NFS client. If unsure, say N. +config PNFS_FILE_LAYOUT + tristate + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP @@ -117,3 +121,14 @@ config NFS_USE_KERNEL_DNS select DNS_RESOLVER select KEYS default y + +config NFS_USE_NEW_IDMAPPER + bool "Use the new idmapper upcall routine" + depends on NFS_V4 && KEYS + help + Say Y here if you want NFS to use the new idmapper upcall functions. + You will need /sbin/request-key (usually provided by the keyutils + package). For details, read + <file:Documentation/filesystems/nfs/idmapper.txt>. + + If you are unsure, say N. diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index da7fda639ea..4776ff9e381 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o +nfs-$(CONFIG_NFS_V4_1) += pnfs.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o + +obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o +nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index e17b49e2eab..e3d29426905 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -9,7 +9,6 @@ #include <linux/completion.h> #include <linux/ip.h> #include <linux/module.h> -#include <linux/smp_lock.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> @@ -17,9 +16,7 @@ #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/sunrpc/svcauth_gss.h> -#if defined(CONFIG_NFS_V4_1) #include <linux/sunrpc/bc_xprt.h> -#endif #include <net/inet_sock.h> @@ -109,7 +106,7 @@ nfs4_callback_up(struct svc_serv *serv) { int ret; - ret = svc_create_xprt(serv, "tcp", PF_INET, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -117,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", PF_INET6, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; @@ -178,30 +175,38 @@ nfs41_callback_svc(void *vrqstp) struct svc_rqst * nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { - struct svc_xprt *bc_xprt; - struct svc_rqst *rqstp = ERR_PTR(-ENOMEM); + struct svc_rqst *rqstp; + int ret; - dprintk("--> %s\n", __func__); - /* Create a svc_sock for the service */ - bc_xprt = svc_sock_create(serv, xprt->prot); - if (!bc_xprt) + /* + * Create an svc_sock for the back channel service that shares the + * fore channel connection. + * Returns the input port (0) and sets the svc_serv bc_xprt on success + */ + ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, + SVC_SOCK_ANONYMOUS); + if (ret < 0) { + rqstp = ERR_PTR(ret); goto out; + } /* * Save the svc_serv in the transport so that it can * be referenced when the session backchannel is initialized */ - serv->bc_xprt = bc_xprt; xprt->bc_serv = serv; INIT_LIST_HEAD(&serv->sv_cb_list); spin_lock_init(&serv->sv_cb_lock); init_waitqueue_head(&serv->sv_cb_waitq); rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); - if (IS_ERR(rqstp)) - svc_sock_destroy(bc_xprt); + if (IS_ERR(rqstp)) { + svc_xprt_put(serv->sv_bc_xprt); + serv->sv_bc_xprt = NULL; + } out: - dprintk("--> %s return %p\n", __func__, rqstp); + dprintk("--> %s return %ld\n", __func__, + IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); return rqstp; } @@ -323,58 +328,58 @@ void nfs_callback_down(int minorversion) mutex_unlock(&nfs_callback_mutex); } -static int check_gss_callback_principal(struct nfs_client *clp, - struct svc_rqst *rqstp) +/* Boolean check of RPC_AUTH_GSS principal */ +int +check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) { struct rpc_clnt *r = clp->cl_rpcclient; char *p = svc_gss_principal(rqstp); + if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) + return 1; + + /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */ + if (clp->cl_minorversion != 0) + return 0; /* * It might just be a normal user principal, in which case * userspace won't bother to tell us the name at all. */ if (p == NULL) - return SVC_DENIED; + return 0; /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ if (memcmp(p, "nfs@", 4) != 0) - return SVC_DENIED; + return 0; p += 4; if (strcmp(p, r->cl_server) != 0) - return SVC_DENIED; - return SVC_OK; + return 0; + return 1; } +/* + * pg_authenticate method for nfsv4 callback threads. + * + * The authflavor has been negotiated, so an incorrect flavor is a server + * bug. Drop packets with incorrect authflavor. + * + * All other checking done after NFS decoding where the nfs_client can be + * found in nfs4_callback_compound + */ static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - struct nfs_client *clp; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - int ret = SVC_OK; - - /* Don't talk to strangers */ - clp = nfs_find_client(svc_addr(rqstp), 4); - if (clp == NULL) - return SVC_DROP; - - dprintk("%s: %s NFSv4 callback!\n", __func__, - svc_print_addr(rqstp, buf, sizeof(buf))); - switch (rqstp->rq_authop->flavour) { - case RPC_AUTH_NULL: - if (rqstp->rq_proc != CB_NULL) - ret = SVC_DENIED; - break; - case RPC_AUTH_UNIX: - break; - case RPC_AUTH_GSS: - ret = check_gss_callback_principal(clp, rqstp); - break; - default: - ret = SVC_DENIED; + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) + return SVC_DROP; + break; + case RPC_AUTH_GSS: + /* No RPC_AUTH_GSS support yet in NFSv4.1 */ + if (svc_is_backchannel(rqstp)) + return SVC_DROP; } - nfs_put_client(clp); - return ret; + return SVC_OK; } /* diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 85a7cfd1b8d..46d93ce7311 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -7,6 +7,7 @@ */ #ifndef __LINUX_FS_NFS_CALLBACK_H #define __LINUX_FS_NFS_CALLBACK_H +#include <linux/sunrpc/svc.h> #define NFS4_CALLBACK 0x40000000 #define NFS4_CALLBACK_XDRSIZE 2048 @@ -34,10 +35,16 @@ enum nfs4_callback_opnum { OP_CB_ILLEGAL = 10044, }; +struct cb_process_state { + __be32 drc_status; + struct nfs_client *clp; +}; + struct cb_compound_hdr_arg { unsigned int taglen; const char *tag; unsigned int minorversion; + unsigned int cb_ident; /* v4.0 callback identifier */ unsigned nops; }; @@ -103,14 +110,23 @@ struct cb_sequenceres { uint32_t csr_target_highestslotid; }; -extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, - struct cb_sequenceres *res); +extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, + struct cb_sequenceres *res, + struct cb_process_state *cps); extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid); #define RCA4_TYPE_MASK_RDATA_DLG 0 #define RCA4_TYPE_MASK_WDATA_DLG 1 +#define RCA4_TYPE_MASK_DIR_DLG 2 +#define RCA4_TYPE_MASK_FILE_LAYOUT 3 +#define RCA4_TYPE_MASK_BLK_LAYOUT 4 +#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8 +#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9 +#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12 +#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15 +#define RCA4_TYPE_MASK_ALL 0xf31f struct cb_recallanyargs { struct sockaddr *craa_addr; @@ -118,25 +134,52 @@ struct cb_recallanyargs { uint32_t craa_type_mask; }; -extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); +extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, + void *dummy, + struct cb_process_state *cps); struct cb_recallslotargs { struct sockaddr *crsa_addr; uint32_t crsa_target_max_slots; }; -extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, - void *dummy); - -#endif /* CONFIG_NFS_V4_1 */ +extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, + void *dummy, + struct cb_process_state *cps); + +struct cb_layoutrecallargs { + struct sockaddr *cbl_addr; + uint32_t cbl_recall_type; + uint32_t cbl_layout_type; + uint32_t cbl_layoutchanged; + union { + struct { + struct nfs_fh cbl_fh; + struct pnfs_layout_range cbl_range; + nfs4_stateid cbl_stateid; + }; + struct nfs_fsid cbl_fsid; + }; +}; -extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); -extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); +extern unsigned nfs4_callback_layoutrecall( + struct cb_layoutrecallargs *args, + void *dummy, struct cb_process_state *cps); +extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); +extern void nfs4_cb_take_slot(struct nfs_client *clp); +#endif /* CONFIG_NFS_V4_1 */ +extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); +extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, + struct cb_getattrres *res, + struct cb_process_state *cps); +extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, + struct cb_process_state *cps); #ifdef CONFIG_NFS_V4 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion); extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid); +extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #endif /* CONFIG_NFS_V4 */ /* * nfs41: Callbacks are expected to not cause substantial latency, diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 930d10fecda..89587573fe5 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -12,30 +12,33 @@ #include "callback.h" #include "delegation.h" #include "internal.h" +#include "pnfs.h" #ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK #endif - -__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) + +__be32 nfs4_callback_getattr(struct cb_getattrargs *args, + struct cb_getattrres *res, + struct cb_process_state *cps) { - struct nfs_client *clp; struct nfs_delegation *delegation; struct nfs_inode *nfsi; struct inode *inode; + res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION); + if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ + goto out; + res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - clp = nfs_find_client(args->addr, 4); - if (clp == NULL) - goto out; dprintk("NFS: GETATTR callback request from %s\n", - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); - inode = nfs_delegation_find_inode(clp, &args->fh); + inode = nfs_delegation_find_inode(cps->clp, &args->fh); if (inode == NULL) - goto out_putclient; + goto out; nfsi = NFS_I(inode); rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); @@ -55,49 +58,41 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres * out_iput: rcu_read_unlock(); iput(inode); -out_putclient: - nfs_put_client(clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status)); return res->status; } -__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) +__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, + struct cb_process_state *cps) { - struct nfs_client *clp; struct inode *inode; __be32 res; - res = htonl(NFS4ERR_BADHANDLE); - clp = nfs_find_client(args->addr, 4); - if (clp == NULL) + res = htonl(NFS4ERR_OP_NOT_IN_SESSION); + if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ goto out; dprintk("NFS: RECALL callback request from %s\n", - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); - - do { - struct nfs_client *prev = clp; - - inode = nfs_delegation_find_inode(clp, &args->fh); - if (inode != NULL) { - /* Set up a helper thread to actually return the delegation */ - switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { - case 0: - res = 0; - break; - case -ENOENT: - if (res != 0) - res = htonl(NFS4ERR_BAD_STATEID); - break; - default: - res = htonl(NFS4ERR_RESOURCE); - } - iput(inode); - } - clp = nfs_find_client_next(prev); - nfs_put_client(prev); - } while (clp != NULL); + rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + res = htonl(NFS4ERR_BADHANDLE); + inode = nfs_delegation_find_inode(cps->clp, &args->fh); + if (inode == NULL) + goto out; + /* Set up a helper thread to actually return the delegation */ + switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { + case 0: + res = 0; + break; + case -ENOENT: + if (res != 0) + res = htonl(NFS4ERR_BAD_STATEID); + break; + default: + res = htonl(NFS4ERR_RESOURCE); + } + iput(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); return res; @@ -113,16 +108,149 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf #if defined(CONFIG_NFS_V4_1) +static u32 initiate_file_draining(struct nfs_client *clp, + struct cb_layoutrecallargs *args) +{ + struct pnfs_layout_hdr *lo; + struct inode *ino; + bool found = false; + u32 rv = NFS4ERR_NOMATCHING_LAYOUT; + LIST_HEAD(free_me_list); + + spin_lock(&clp->cl_lock); + list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { + if (nfs_compare_fh(&args->cbl_fh, + &NFS_I(lo->plh_inode)->fh)) + continue; + ino = igrab(lo->plh_inode); + if (!ino) + continue; + found = true; + /* Without this, layout can be freed as soon + * as we release cl_lock. + */ + get_layout_hdr(lo); + break; + } + spin_unlock(&clp->cl_lock); + if (!found) + return NFS4ERR_NOMATCHING_LAYOUT; + + spin_lock(&ino->i_lock); + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || + mark_matching_lsegs_invalid(lo, &free_me_list, + args->cbl_range.iomode)) + rv = NFS4ERR_DELAY; + else + rv = NFS4ERR_NOMATCHING_LAYOUT; + pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&free_me_list); + put_layout_hdr(lo); + iput(ino); + return rv; +} + +static u32 initiate_bulk_draining(struct nfs_client *clp, + struct cb_layoutrecallargs *args) +{ + struct pnfs_layout_hdr *lo; + struct inode *ino; + u32 rv = NFS4ERR_NOMATCHING_LAYOUT; + struct pnfs_layout_hdr *tmp; + LIST_HEAD(recall_list); + LIST_HEAD(free_me_list); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + + spin_lock(&clp->cl_lock); + list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { + if ((args->cbl_recall_type == RETURN_FSID) && + memcmp(&NFS_SERVER(lo->plh_inode)->fsid, + &args->cbl_fsid, sizeof(struct nfs_fsid))) + continue; + if (!igrab(lo->plh_inode)) + continue; + get_layout_hdr(lo); + BUG_ON(!list_empty(&lo->plh_bulk_recall)); + list_add(&lo->plh_bulk_recall, &recall_list); + } + spin_unlock(&clp->cl_lock); + list_for_each_entry_safe(lo, tmp, + &recall_list, plh_bulk_recall) { + ino = lo->plh_inode; + spin_lock(&ino->i_lock); + set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); + if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode)) + rv = NFS4ERR_DELAY; + list_del_init(&lo->plh_bulk_recall); + spin_unlock(&ino->i_lock); + put_layout_hdr(lo); + iput(ino); + } + pnfs_free_lseg_list(&free_me_list); + return rv; +} + +static u32 do_callback_layoutrecall(struct nfs_client *clp, + struct cb_layoutrecallargs *args) +{ + u32 res = NFS4ERR_DELAY; + + dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); + if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state)) + goto out; + if (args->cbl_recall_type == RETURN_FILE) + res = initiate_file_draining(clp, args); + else + res = initiate_bulk_draining(clp, args); + clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state); +out: + dprintk("%s returning %i\n", __func__, res); + return res; + +} + +__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args, + void *dummy, struct cb_process_state *cps) +{ + u32 res; + + dprintk("%s: -->\n", __func__); + + if (cps->clp) + res = do_callback_layoutrecall(cps->clp, args); + else + res = NFS4ERR_OP_NOT_IN_SESSION; + + dprintk("%s: exit with status = %d\n", __func__, res); + return cpu_to_be32(res); +} + +static void pnfs_recall_all_layouts(struct nfs_client *clp) +{ + struct cb_layoutrecallargs args; + + /* Pretend we got a CB_LAYOUTRECALL(ALL) */ + memset(&args, 0, sizeof(args)); + args.cbl_recall_type = RETURN_ALL; + /* FIXME we ignore errors, what should we do? */ + do_callback_layoutrecall(clp, &args); +} + int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) { if (delegation == NULL) return 0; - /* seqid is 4-bytes long */ - if (((u32 *) &stateid->data)[0] != 0) + if (stateid->stateid.seqid != 0) return 0; - if (memcmp(&delegation->stateid.data[4], &stateid->data[4], - sizeof(stateid->data)-4)) + if (memcmp(&delegation->stateid.stateid.other, + &stateid->stateid.other, + NFS4_STATEID_OTHER_SIZE)) return 0; return 1; @@ -185,42 +313,6 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) } /* - * Returns a pointer to a held 'struct nfs_client' that matches the server's - * address, major version number, and session ID. It is the caller's - * responsibility to release the returned reference. - * - * Returns NULL if there are no connections with sessions, or if no session - * matches the one of interest. - */ - static struct nfs_client *find_client_with_session( - const struct sockaddr *addr, u32 nfsversion, - struct nfs4_sessionid *sessionid) -{ - struct nfs_client *clp; - - clp = nfs_find_client(addr, 4); - if (clp == NULL) - return NULL; - - do { - struct nfs_client *prev = clp; - - if (clp->cl_session != NULL) { - if (memcmp(clp->cl_session->sess_id.data, - sessionid->data, - NFS4_MAX_SESSIONID_LEN) == 0) { - /* Returns a held reference to clp */ - return clp; - } - } - clp = nfs_find_client_next(prev); - nfs_put_client(prev); - } while (clp != NULL); - - return NULL; -} - -/* * For each referring call triple, check the session's slot table for * a match. If the slot is in use and the sequence numbers match, the * client is still waiting for a response to the original request. @@ -276,20 +368,28 @@ out: } __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, - struct cb_sequenceres *res) + struct cb_sequenceres *res, + struct cb_process_state *cps) { struct nfs_client *clp; int i; - __be32 status; + __be32 status = htonl(NFS4ERR_BADSESSION); - status = htonl(NFS4ERR_BADSESSION); - clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); + cps->clp = NULL; + + clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; + /* state manager is resetting the session */ + if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { + status = NFS4ERR_DELAY; + goto out; + } + status = validate_seqid(&clp->cl_session->bc_slot_table, args); if (status) - goto out_putclient; + goto out; /* * Check for pending referring calls. If a match is found, a @@ -298,7 +398,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, */ if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { status = htonl(NFS4ERR_DELAY); - goto out_putclient; + goto out; } memcpy(&res->csr_sessionid, &args->csa_sessionid, @@ -307,83 +407,93 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_slotid = args->csa_slotid; res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + nfs4_cb_take_slot(clp); -out_putclient: - nfs_put_client(clp); out: + cps->clp = clp; /* put in nfs4_callback_compound */ for (i = 0; i < args->csa_nrclists; i++) kfree(args->csa_rclists[i].rcl_refcalls); kfree(args->csa_rclists); - if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) - res->csr_status = 0; - else + if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) { + cps->drc_status = status; + status = 0; + } else res->csr_status = status; + dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, ntohl(status), ntohl(res->csr_status)); return status; } -__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) +static bool +validate_bitmap_values(unsigned long mask) +{ + return (mask & ~RCA4_TYPE_MASK_ALL) == 0; +} + +__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, + struct cb_process_state *cps) { - struct nfs_client *clp; __be32 status; fmode_t flags = 0; - status = htonl(NFS4ERR_OP_NOT_IN_SESSION); - clp = nfs_find_client(args->craa_addr, 4); - if (clp == NULL) + status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); + if (!cps->clp) /* set in cb_sequence */ goto out; dprintk("NFS: RECALL_ANY callback request from %s\n", - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + status = cpu_to_be32(NFS4ERR_INVAL); + if (!validate_bitmap_values(args->craa_type_mask)) + goto out; + status = cpu_to_be32(NFS4_OK); if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *) &args->craa_type_mask)) flags = FMODE_READ; if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *) &args->craa_type_mask)) flags |= FMODE_WRITE; - + if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *) + &args->craa_type_mask)) + pnfs_recall_all_layouts(cps->clp); if (flags) - nfs_expire_all_delegation_types(clp, flags); - status = htonl(NFS4_OK); + nfs_expire_all_delegation_types(cps->clp, flags); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } /* Reduce the fore channel's max_slots to the target value */ -__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) +__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, + struct cb_process_state *cps) { - struct nfs_client *clp; struct nfs4_slot_table *fc_tbl; __be32 status; status = htonl(NFS4ERR_OP_NOT_IN_SESSION); - clp = nfs_find_client(args->crsa_addr, 4); - if (clp == NULL) + if (!cps->clp) /* set in cb_sequence */ goto out; dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), args->crsa_target_max_slots); - fc_tbl = &clp->cl_session->fc_slot_table; + fc_tbl = &cps->clp->cl_session->fc_slot_table; status = htonl(NFS4ERR_BAD_HIGH_SLOT); if (args->crsa_target_max_slots > fc_tbl->max_slots || args->crsa_target_max_slots < 1) - goto out_putclient; + goto out; status = htonl(NFS4_OK); if (args->crsa_target_max_slots == fc_tbl->max_slots) - goto out_putclient; + goto out; fc_tbl->target_max_slots = args->crsa_target_max_slots; - nfs41_handle_recall_slot(clp); -out_putclient: - nfs_put_client(clp); /* balance nfs_find_client */ + nfs41_handle_recall_slot(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 05af212f0ed..14e0f9371d1 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -10,8 +10,10 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/slab.h> +#include <linux/sunrpc/bc_xprt.h> #include "nfs4_fs.h" #include "callback.h" +#include "internal.h" #define CB_OP_TAGLEN_MAXSZ (512) #define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) @@ -22,6 +24,7 @@ #define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) +#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 4 + 1 + 3) #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) @@ -33,7 +36,8 @@ /* Internal error code */ #define NFS4ERR_RESOURCE_HDR 11050 -typedef __be32 (*callback_process_op_t)(void *, void *); +typedef __be32 (*callback_process_op_t)(void *, void *, + struct cb_process_state *); typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); @@ -160,7 +164,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound hdr->minorversion = ntohl(*p++); /* Check minor version is zero or one. */ if (hdr->minorversion <= 1) { - p++; /* skip callback_ident */ + hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ } else { printk(KERN_WARNING "%s: NFSv4 server callback with " "illegal minor version %u!\n", @@ -220,6 +224,66 @@ out: #if defined(CONFIG_NFS_V4_1) +static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_layoutrecallargs *args) +{ + __be32 *p; + __be32 status = 0; + uint32_t iomode; + + args->cbl_addr = svc_addr(rqstp); + p = read_buf(xdr, 4 * sizeof(uint32_t)); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto out; + } + + args->cbl_layout_type = ntohl(*p++); + /* Depite the spec's xdr, iomode really belongs in the FILE switch, + * as it is unuseable and ignored with the other types. + */ + iomode = ntohl(*p++); + args->cbl_layoutchanged = ntohl(*p++); + args->cbl_recall_type = ntohl(*p++); + + if (args->cbl_recall_type == RETURN_FILE) { + args->cbl_range.iomode = iomode; + status = decode_fh(xdr, &args->cbl_fh); + if (unlikely(status != 0)) + goto out; + + p = read_buf(xdr, 2 * sizeof(uint64_t)); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto out; + } + p = xdr_decode_hyper(p, &args->cbl_range.offset); + p = xdr_decode_hyper(p, &args->cbl_range.length); + status = decode_stateid(xdr, &args->cbl_stateid); + if (unlikely(status != 0)) + goto out; + } else if (args->cbl_recall_type == RETURN_FSID) { + p = read_buf(xdr, 2 * sizeof(uint64_t)); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto out; + } + p = xdr_decode_hyper(p, &args->cbl_fsid.major); + p = xdr_decode_hyper(p, &args->cbl_fsid.minor); + } else if (args->cbl_recall_type != RETURN_ALL) { + status = htonl(NFS4ERR_BADXDR); + goto out; + } + dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n", + __func__, + args->cbl_layout_type, iomode, + args->cbl_layoutchanged, args->cbl_recall_type); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { @@ -574,10 +638,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_SEQUENCE: case OP_CB_RECALL_ANY: case OP_CB_RECALL_SLOT: + case OP_CB_LAYOUTRECALL: *op = &callback_ops[op_nr]; break; - case OP_CB_LAYOUTRECALL: case OP_CB_NOTIFY_DEVICEID: case OP_CB_NOTIFY: case OP_CB_PUSH_DELEG: @@ -593,6 +657,37 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) return htonl(NFS_OK); } +static void nfs4_callback_free_slot(struct nfs4_session *session) +{ + struct nfs4_slot_table *tbl = &session->bc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); + /* + * Let the state manager know callback processing done. + * A single slot, so highest used slotid is either 0 or -1 + */ + tbl->highest_used_slotid--; + nfs4_check_drain_bc_complete(session); + spin_unlock(&tbl->slot_tbl_lock); +} + +static void nfs4_cb_free_slot(struct nfs_client *clp) +{ + if (clp && clp->cl_session) + nfs4_callback_free_slot(clp->cl_session); +} + +/* A single slot, so highest used slotid is either 0 or -1 */ +void nfs4_cb_take_slot(struct nfs_client *clp) +{ + struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); + tbl->highest_used_slotid++; + BUG_ON(tbl->highest_used_slotid != 0); + spin_unlock(&tbl->slot_tbl_lock); +} + #else /* CONFIG_NFS_V4_1 */ static __be32 @@ -601,6 +696,9 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } +static void nfs4_cb_free_slot(struct nfs_client *clp) +{ +} #endif /* CONFIG_NFS_V4_1 */ static __be32 @@ -621,7 +719,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) static __be32 process_op(uint32_t minorversion, int nop, struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, - struct xdr_stream *xdr_out, void *resp, int* drc_status) + struct xdr_stream *xdr_out, void *resp, + struct cb_process_state *cps) { struct callback_op *op = &callback_ops[0]; unsigned int op_nr; @@ -644,8 +743,8 @@ static __be32 process_op(uint32_t minorversion, int nop, if (status) goto encode_hdr; - if (*drc_status) { - status = *drc_status; + if (cps->drc_status) { + status = cps->drc_status; goto encode_hdr; } @@ -653,16 +752,10 @@ static __be32 process_op(uint32_t minorversion, int nop, if (maxlen > 0 && maxlen < PAGE_SIZE) { status = op->decode_args(rqstp, xdr_in, argp); if (likely(status == 0)) - status = op->process_op(argp, resp); + status = op->process_op(argp, resp, cps); } else status = htonl(NFS4ERR_RESOURCE); - /* Only set by OP_CB_SEQUENCE processing */ - if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) { - *drc_status = status; - status = 0; - } - encode_hdr: res = encode_op_hdr(xdr_out, op_nr, status); if (unlikely(res)) @@ -681,8 +774,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_compound_hdr_arg hdr_arg = { 0 }; struct cb_compound_hdr_res hdr_res = { NULL }; struct xdr_stream xdr_in, xdr_out; - __be32 *p; - __be32 status, drc_status = 0; + __be32 *p, status; + struct cb_process_state cps = { + .drc_status = 0, + .clp = NULL, + }; unsigned int nops = 0; dprintk("%s: start\n", __func__); @@ -696,6 +792,12 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r if (status == __constant_htonl(NFS4ERR_RESOURCE)) return rpc_garbage_args; + if (hdr_arg.minorversion == 0) { + cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); + if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) + return rpc_drop_reply; + } + hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) @@ -703,7 +805,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r while (status == 0 && nops != hdr_arg.nops) { status = process_op(hdr_arg.minorversion, nops, rqstp, - &xdr_in, argp, &xdr_out, resp, &drc_status); + &xdr_in, argp, &xdr_out, resp, &cps); nops++; } @@ -716,6 +818,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r *hdr_res.status = status; *hdr_res.nops = htonl(nops); + nfs4_cb_free_slot(cps.clp); + nfs_put_client(cps.clp); dprintk("%s: done, status = %u\n", __func__, ntohl(status)); return rpc_success; } @@ -739,6 +843,12 @@ static struct callback_op callback_ops[] = { .res_maxsize = CB_OP_RECALL_RES_MAXSZ, }, #if defined(CONFIG_NFS_V4_1) + [OP_CB_LAYOUTRECALL] = { + .process_op = (callback_process_op_t)nfs4_callback_layoutrecall, + .decode_args = + (callback_decode_arg_t)decode_layoutrecall_args, + .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, + }, [OP_CB_SEQUENCE] = { .process_op = (callback_process_op_t)nfs4_callback_sequence, .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e7340729af8..bd3ca32879e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -48,6 +48,7 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -55,6 +56,30 @@ static DEFINE_SPINLOCK(nfs_client_lock); static LIST_HEAD(nfs_client_list); static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); +#ifdef CONFIG_NFS_V4 +static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ + +/* + * Get a unique NFSv4.0 callback identifier which will be used + * by the V4.0 callback service to lookup the nfs_client struct + */ +static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) +{ + int ret = 0; + + if (clp->rpc_ops->version != 4 || minorversion != 0) + return ret; +retry: + if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) + return -ENOMEM; + spin_lock(&nfs_client_lock); + ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); + spin_unlock(&nfs_client_lock); + if (ret == -EAGAIN) + goto retry; + return ret; +} +#endif /* CONFIG_NFS_V4 */ /* * RPC cruft for NFS @@ -143,7 +168,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_proto = cl_init->proto; #ifdef CONFIG_NFS_V4 - INIT_LIST_HEAD(&clp->cl_delegations); + err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); + if (err) + goto error_cleanup; + spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); @@ -155,7 +183,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; - +#if defined(CONFIG_NFS_V4_1) + INIT_LIST_HEAD(&clp->cl_layouts); +#endif nfs_fscache_get_client_cookie(clp); return clp; @@ -167,21 +197,17 @@ error_0: } #ifdef CONFIG_NFS_V4 -/* - * Clears/puts all minor version specific parts from an nfs_client struct - * reverting it to minorversion 0. - */ -static void nfs4_clear_client_minor_version(struct nfs_client *clp) -{ #ifdef CONFIG_NFS_V4_1 - if (nfs4_has_session(clp)) { +static void nfs4_shutdown_session(struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) nfs4_destroy_session(clp->cl_session); - clp->cl_session = NULL; - } - - clp->cl_mvops = nfs_v4_minor_ops[0]; -#endif /* CONFIG_NFS_V4_1 */ } +#else /* CONFIG_NFS_V4_1 */ +static void nfs4_shutdown_session(struct nfs_client *clp) +{ +} +#endif /* CONFIG_NFS_V4_1 */ /* * Destroy the NFS4 callback service @@ -196,17 +222,49 @@ static void nfs4_shutdown_client(struct nfs_client *clp) { if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) nfs4_kill_renewd(clp); - nfs4_clear_client_minor_version(clp); + nfs4_shutdown_session(clp); nfs4_destroy_callback(clp); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); rpc_destroy_wait_queue(&clp->cl_rpcwaitq); } + +/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ +void nfs_cleanup_cb_ident_idr(void) +{ + idr_destroy(&cb_ident_idr); +} + +/* nfs_client_lock held */ +static void nfs_cb_idr_remove_locked(struct nfs_client *clp) +{ + if (clp->cl_cb_ident) + idr_remove(&cb_ident_idr, clp->cl_cb_ident); +} + +static void pnfs_init_server(struct nfs_server *server) +{ + rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); +} + #else static void nfs4_shutdown_client(struct nfs_client *clp) { } + +void nfs_cleanup_cb_ident_idr(void) +{ +} + +static void nfs_cb_idr_remove_locked(struct nfs_client *clp) +{ +} + +static void pnfs_init_server(struct nfs_server *server) +{ +} + #endif /* CONFIG_NFS_V4 */ /* @@ -245,6 +303,7 @@ void nfs_put_client(struct nfs_client *clp) if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { list_del(&clp->cl_share_link); + nfs_cb_idr_remove_locked(clp); spin_unlock(&nfs_client_lock); BUG_ON(!list_empty(&clp->cl_superblocks)); @@ -252,6 +311,7 @@ void nfs_put_client(struct nfs_client *clp) nfs_free_client(clp); } } +EXPORT_SYMBOL_GPL(nfs_put_client); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /* @@ -359,70 +419,28 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, return 0; } -/* - * Find a client by IP address and protocol version - * - returns NULL if no such client - */ -struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) +/* Common match routine for v4.0 and v4.1 callback services */ +bool +nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, + u32 minorversion) { - struct nfs_client *clp; - - spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { - struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; - /* Don't match clients that failed to initialise properly */ - if (!(clp->cl_cons_state == NFS_CS_READY || - clp->cl_cons_state == NFS_CS_SESSION_INITING)) - continue; + /* Don't match clients that failed to initialise */ + if (!(clp->cl_cons_state == NFS_CS_READY || + clp->cl_cons_state == NFS_CS_SESSION_INITING)) + return false; - /* Different NFS versions cannot share the same nfs_client */ - if (clp->rpc_ops->version != nfsversion) - continue; + /* Match the version and minorversion */ + if (clp->rpc_ops->version != 4 || + clp->cl_minorversion != minorversion) + return false; - /* Match only the IP address, not the port number */ - if (!nfs_sockaddr_match_ipaddr(addr, clap)) - continue; + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(addr, clap)) + return false; - atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); - return clp; - } - spin_unlock(&nfs_client_lock); - return NULL; -} - -/* - * Find a client by IP address and protocol version - * - returns NULL if no such client - */ -struct nfs_client *nfs_find_client_next(struct nfs_client *clp) -{ - struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr; - u32 nfsvers = clp->rpc_ops->version; - - spin_lock(&nfs_client_lock); - list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) { - struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; - - /* Don't match clients that failed to initialise properly */ - if (clp->cl_cons_state != NFS_CS_READY) - continue; - - /* Different NFS versions cannot share the same nfs_client */ - if (clp->rpc_ops->version != nfsvers) - continue; - - /* Match only the IP address, not the port number */ - if (!nfs_sockaddr_match_ipaddr(sap, clap)) - continue; - - atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); - return clp; - } - spin_unlock(&nfs_client_lock); - return NULL; + return true; } /* @@ -601,6 +619,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { + .net = &init_net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, @@ -635,7 +654,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp, */ static void nfs_destroy_server(struct nfs_server *server) { - if (!(server->flags & NFS_MOUNT_NONLM)) + if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) || + !(server->flags & NFS_MOUNT_LOCAL_FCNTL)) nlmclnt_done(server->nlm_host); } @@ -657,7 +677,8 @@ static int nfs_start_lockd(struct nfs_server *server) if (nlm_init.nfs_version > 3) return 0; - if (server->flags & NFS_MOUNT_NONLM) + if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) && + (server->flags & NFS_MOUNT_LOCAL_FCNTL)) return 0; switch (clp->cl_proto) { @@ -898,11 +919,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + set_pnfs_layoutdriver(server, fsinfo->layouttype); + server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES) + server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES; if (server->dtsize > server->rsize) server->dtsize = server->rsize; @@ -913,6 +936,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * server->maxfilesize = fsinfo->maxfilesize; + server->time_delta = fsinfo->time_delta; + /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } @@ -935,6 +960,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str } fsinfo.fattr = fattr; + fsinfo.layouttype = 0; error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); if (error < 0) goto out_error; @@ -976,6 +1002,27 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve target->options = source->options; } +static void nfs_server_insert_lists(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + + spin_lock(&nfs_client_lock); + list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + +} + +static void nfs_server_remove_lists(struct nfs_server *server) +{ + spin_lock(&nfs_client_lock); + list_del_rcu(&server->client_link); + list_del(&server->master_link); + spin_unlock(&nfs_client_lock); + + synchronize_rcu(); +} + /* * Allocate and initialise a server record */ @@ -992,6 +1039,7 @@ static struct nfs_server *nfs_alloc_server(void) /* Zero out the NFS state stuff */ INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); + INIT_LIST_HEAD(&server->delegations); atomic_set(&server->active, 0); @@ -1007,6 +1055,8 @@ static struct nfs_server *nfs_alloc_server(void) return NULL; } + pnfs_init_server(server); + return server; } @@ -1017,10 +1067,8 @@ void nfs_free_server(struct nfs_server *server) { dprintk("--> nfs_free_server()\n"); - spin_lock(&nfs_client_lock); - list_del(&server->client_link); - list_del(&server->master_link); - spin_unlock(&nfs_client_lock); + nfs_server_remove_lists(server); + unset_pnfs_layoutdriver(server); if (server->destroy != NULL) server->destroy(server); @@ -1095,11 +1143,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - spin_lock(&nfs_client_lock); - list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); - spin_unlock(&nfs_client_lock); - + nfs_server_insert_lists(server); server->mount_time = jiffies; nfs_free_fattr(fattr); return server; @@ -1112,6 +1156,96 @@ error: #ifdef CONFIG_NFS_V4 /* + * NFSv4.0 callback thread helper + * + * Find a client by IP address, protocol version, and minorversion + * + * Called from the pg_authenticate method. The callback identifier + * is not used as it has not been decoded. + * + * Returns NULL if no such client + */ +struct nfs_client * +nfs4_find_client_no_ident(const struct sockaddr *addr) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + if (nfs4_cb_match_client(addr, clp, 0) == false) + continue; + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; + } + spin_unlock(&nfs_client_lock); + return NULL; +} + +/* + * NFSv4.0 callback thread helper + * + * Find a client by callback identifier + */ +struct nfs_client * +nfs4_find_client_ident(int cb_ident) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + clp = idr_find(&cb_ident_idr, cb_ident); + if (clp) + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; +} + +#if defined(CONFIG_NFS_V4_1) +/* + * NFSv4.1 callback thread helper + * For CB_COMPOUND calls, find a client by IP address, protocol version, + * minorversion, and sessionID + * + * Returns NULL if no such client + */ +struct nfs_client * +nfs4_find_client_sessionid(const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + if (nfs4_cb_match_client(addr, clp, 1) == false) + continue; + + if (!nfs4_has_session(clp)) + continue; + + /* Match sessionid*/ + if (memcmp(clp->cl_session->sess_id.data, + sid->data, NFS4_MAX_SESSIONID_LEN) != 0) + continue; + + atomic_inc(&clp->cl_count); + spin_unlock(&nfs_client_lock); + return clp; + } + spin_unlock(&nfs_client_lock); + return NULL; +} + +#else /* CONFIG_NFS_V4_1 */ + +struct nfs_client * +nfs4_find_client_sessionid(const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + return NULL; +} +#endif /* CONFIG_NFS_V4_1 */ + +/* * Initialize the NFS4 callback service */ static int nfs4_init_callback(struct nfs_client *clp) @@ -1329,11 +1463,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; - spin_lock(&nfs_client_lock); - list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); - spin_unlock(&nfs_client_lock); - + nfs_server_insert_lists(server); server->mount_time = jiffies; out: nfs_free_fattr(fattr); @@ -1356,8 +1486,9 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| - NFS_CAP_POSIX_LOCK; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; server->options = data->options; /* Get a client record */ @@ -1537,11 +1668,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (error < 0) goto out_free_server; - spin_lock(&nfs_client_lock); - list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); - spin_unlock(&nfs_client_lock); - + nfs_server_insert_lists(server); server->mount_time = jiffies; nfs_free_fattr(fattr_fsinfo); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b9c3c43cea1..bbbc6bf5cb2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <linux/nfs4.h> @@ -24,8 +23,6 @@ static void nfs_do_free_delegation(struct nfs_delegation *delegation) { - if (delegation->cred) - put_rpccred(delegation->cred); kfree(delegation); } @@ -38,14 +35,30 @@ static void nfs_free_delegation_callback(struct rcu_head *head) static void nfs_free_delegation(struct nfs_delegation *delegation) { + if (delegation->cred) { + put_rpccred(delegation->cred); + delegation->cred = NULL; + } call_rcu(&delegation->rcu, nfs_free_delegation_callback); } +/** + * nfs_mark_delegation_referenced - set delegation's REFERENCED flag + * @delegation: delegation to process + * + */ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) { set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); } +/** + * nfs_have_delegation - check if inode has a delegation + * @inode: inode to check + * @flags: delegation types to check for + * + * Returns one if inode has the indicated delegation, otherwise zero. + */ int nfs_have_delegation(struct inode *inode, fmode_t flags) { struct nfs_delegation *delegation; @@ -71,20 +84,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ if (inode->i_flock == NULL) goto out; - /* Protect inode->i_flock using the BKL */ - lock_kernel(); + /* Protect inode->i_flock using the file locks lock */ + lock_flocks(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file) != ctx) continue; - unlock_kernel(); + unlock_flocks(); status = nfs4_lock_delegation_recall(state, fl); if (status < 0) goto out; - lock_kernel(); + lock_flocks(); } - unlock_kernel(); + unlock_flocks(); out: return status; } @@ -120,10 +133,15 @@ again: return 0; } -/* - * Set up a delegation on an inode +/** + * nfs_inode_reclaim_delegation - process a delegation reclaim request + * @inode: inode to process + * @cred: credential to use for request + * @res: new delegation state from server + * */ -void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, + struct nfs_openres *res) { struct nfs_delegation *delegation; struct rpc_cred *oldcred = NULL; @@ -176,38 +194,52 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation return inode; } -static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, - const nfs4_stateid *stateid, - struct nfs_client *clp) +static struct nfs_delegation * +nfs_detach_delegation_locked(struct nfs_inode *nfsi, + struct nfs_server *server) { struct nfs_delegation *delegation = rcu_dereference_protected(nfsi->delegation, - lockdep_is_held(&clp->cl_lock)); + lockdep_is_held(&server->nfs_client->cl_lock)); if (delegation == NULL) goto nomatch; + spin_lock(&delegation->lock); - if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, - sizeof(delegation->stateid.data)) != 0) - goto nomatch_unlock; list_del_rcu(&delegation->super_list); delegation->inode = NULL; nfsi->delegation_state = 0; rcu_assign_pointer(nfsi->delegation, NULL); spin_unlock(&delegation->lock); return delegation; -nomatch_unlock: - spin_unlock(&delegation->lock); nomatch: return NULL; } -/* - * Set up a delegation on an inode +static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi, + struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs_delegation *delegation; + + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(nfsi, server); + spin_unlock(&clp->cl_lock); + return delegation; +} + +/** + * nfs_inode_set_delegation - set up a delegation on an inode + * @inode: inode to which delegation applies + * @cred: cred to use for subsequent delegation processing + * @res: new delegation state from server + * + * Returns zero on success, or a negative errno value. */ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation, *old_delegation; struct nfs_delegation *freeme = NULL; @@ -228,7 +260,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct spin_lock(&clp->cl_lock); old_delegation = rcu_dereference_protected(nfsi->delegation, - lockdep_is_held(&clp->cl_lock)); + lockdep_is_held(&clp->cl_lock)); if (old_delegation != NULL) { if (memcmp(&delegation->stateid, &old_delegation->stateid, sizeof(old_delegation->stateid)) == 0 && @@ -247,9 +279,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = NULL; goto out; } - freeme = nfs_detach_delegation_locked(nfsi, NULL, clp); + freeme = nfs_detach_delegation_locked(nfsi, server); } - list_add_rcu(&delegation->super_list, &clp->cl_delegations); + list_add_rcu(&delegation->super_list, &server->delegations); nfsi->delegation_state = delegation->type; rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; @@ -291,73 +323,85 @@ out: return err; } -/* - * Return all delegations that have been marked for return +/** + * nfs_client_return_marked_delegations - return previously marked delegations + * @clp: nfs_client to process + * + * Returns zero on success, or a negative errno value. */ int nfs_client_return_marked_delegations(struct nfs_client *clp) { struct nfs_delegation *delegation; + struct nfs_server *server; struct inode *inode; int err = 0; restart: rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) - continue; - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) - continue; - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); - spin_unlock(&clp->cl_lock); - rcu_read_unlock(); - if (delegation != NULL) { - filemap_flush(inode->i_mapping); - err = __nfs_inode_return_delegation(inode, delegation, 0); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry_rcu(delegation, &server->delegations, + super_list) { + if (!test_and_clear_bit(NFS_DELEGATION_RETURN, + &delegation->flags)) + continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; + delegation = nfs_detach_delegation(NFS_I(inode), + server); + rcu_read_unlock(); + + if (delegation != NULL) { + filemap_flush(inode->i_mapping); + err = __nfs_inode_return_delegation(inode, + delegation, 0); + } + iput(inode); + if (!err) + goto restart; + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + return err; } - iput(inode); - if (!err) - goto restart; - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); - return err; } rcu_read_unlock(); return 0; } -/* - * This function returns the delegation without reclaiming opens - * or protecting against delegation reclaims. - * It is therefore really only safe to be called from - * nfs4_clear_inode() +/** + * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens + * @inode: inode to process + * + * Does not protect against delegation reclaims, therefore really only safe + * to be called from nfs4_clear_inode(). */ void nfs_inode_return_delegation_noreclaim(struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; if (rcu_access_pointer(nfsi->delegation) != NULL) { - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); - spin_unlock(&clp->cl_lock); + delegation = nfs_detach_delegation(nfsi, server); if (delegation != NULL) nfs_do_return_delegation(inode, delegation, 0); } } +/** + * nfs_inode_return_delegation - synchronously return a delegation + * @inode: inode to process + * + * Returns zero on success, or a negative errno value. + */ int nfs_inode_return_delegation(struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int err = 0; if (rcu_access_pointer(nfsi->delegation) != NULL) { - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); - spin_unlock(&clp->cl_lock); + delegation = nfs_detach_delegation(nfsi, server); if (delegation != NULL) { nfs_wb_all(inode); err = __nfs_inode_return_delegation(inode, delegation, 1); @@ -366,46 +410,61 @@ int nfs_inode_return_delegation(struct inode *inode) return err; } -static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation) +static void nfs_mark_return_delegation(struct nfs_delegation *delegation) { + struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); } -/* - * Return all delegations associated to a super block +/** + * nfs_super_return_all_delegations - return delegations for one superblock + * @sb: sb to process + * */ void nfs_super_return_all_delegations(struct super_block *sb) { - struct nfs_client *clp = NFS_SB(sb)->nfs_client; + struct nfs_server *server = NFS_SB(sb); + struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; if (clp == NULL) return; + rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); - if (delegation->inode != NULL && delegation->inode->i_sb == sb) - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); spin_unlock(&delegation->lock); } rcu_read_unlock(); + if (nfs_client_return_marked_delegations(clp) != 0) nfs4_schedule_state_manager(clp); } -static -void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags) +static void nfs_mark_return_all_delegation_types(struct nfs_server *server, + fmode_t flags) { struct nfs_delegation *delegation; - rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(clp, delegation); + nfs_mark_return_delegation(delegation); } +} + +static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, + fmode_t flags) +{ + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_mark_return_all_delegation_types(server, flags); rcu_read_unlock(); } @@ -420,19 +479,32 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } +/** + * nfs_expire_all_delegation_types + * @clp: client to process + * @flags: delegation types to expire + * + */ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) { nfs_client_mark_return_all_delegation_types(clp, flags); nfs_delegation_run_state_manager(clp); } +/** + * nfs_expire_all_delegations + * @clp: client to process + * + */ void nfs_expire_all_delegations(struct nfs_client *clp) { nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); } -/* - * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. +/** + * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN + * @clp: client to process + * */ void nfs_handle_cb_pathdown(struct nfs_client *clp) { @@ -441,29 +513,43 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) nfs_client_mark_return_all_delegations(clp); } -static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp) +static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; - rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(clp, delegation); + nfs_mark_return_delegation(delegation); } - rcu_read_unlock(); } +/** + * nfs_expire_unreferenced_delegations - Eliminate unused delegations + * @clp: nfs_client to process + * + */ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) { - nfs_client_mark_return_unreferenced_delegations(clp); + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_mark_return_unreferenced_delegations(server); + rcu_read_unlock(); + nfs_delegation_run_state_manager(clp); } -/* - * Asynchronous delegation recall! +/** + * nfs_async_inode_return_delegation - asynchronously return a delegation + * @inode: inode to process + * @stateid: state ID information from CB_RECALL arguments + * + * Returns zero on success, or a negative errno value. */ -int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) +int nfs_async_inode_return_delegation(struct inode *inode, + const nfs4_stateid *stateid) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_delegation *delegation; @@ -475,22 +561,21 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s rcu_read_unlock(); return -ENOENT; } - - nfs_mark_return_delegation(clp, delegation); + nfs_mark_return_delegation(delegation); rcu_read_unlock(); + nfs_delegation_run_state_manager(clp); return 0; } -/* - * Retrieve the inode associated with a delegation - */ -struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle) +static struct inode * +nfs_delegation_find_inode_server(struct nfs_server *server, + const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; struct inode *res = NULL; - rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { @@ -500,49 +585,121 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs if (res != NULL) break; } + return res; +} + +/** + * nfs_delegation_find_inode - retrieve the inode associated with a delegation + * @clp: client state handle + * @fhandle: filehandle from a delegation recall + * + * Returns pointer to inode matching "fhandle," or NULL if a matching inode + * cannot be found. + */ +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, + const struct nfs_fh *fhandle) +{ + struct nfs_server *server; + struct inode *res = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + res = nfs_delegation_find_inode_server(server, fhandle); + if (res != NULL) + break; + } rcu_read_unlock(); return res; } -/* - * Mark all delegations as needing to be reclaimed +static void nfs_delegation_mark_reclaim_server(struct nfs_server *server) +{ + struct nfs_delegation *delegation; + + list_for_each_entry_rcu(delegation, &server->delegations, super_list) + set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); +} + +/** + * nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed + * @clp: nfs_client to process + * */ void nfs_delegation_mark_reclaim(struct nfs_client *clp) { - struct nfs_delegation *delegation; + struct nfs_server *server; + rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) - set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_delegation_mark_reclaim_server(server); rcu_read_unlock(); } -/* - * Reap all unclaimed delegations after reboot recovery is done +/** + * nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done + * @clp: nfs_client to process + * */ void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { struct nfs_delegation *delegation; + struct nfs_server *server; struct inode *inode; + restart: rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) - continue; - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) - continue; - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); - spin_unlock(&clp->cl_lock); - rcu_read_unlock(); - if (delegation != NULL) - nfs_free_delegation(delegation); - iput(inode); - goto restart; + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry_rcu(delegation, &server->delegations, + super_list) { + if (test_bit(NFS_DELEGATION_NEED_RECLAIM, + &delegation->flags) == 0) + continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; + delegation = nfs_detach_delegation(NFS_I(inode), + server); + rcu_read_unlock(); + + if (delegation != NULL) + nfs_free_delegation(delegation); + iput(inode); + goto restart; + } } rcu_read_unlock(); } +/** + * nfs_delegations_present - check for existence of delegations + * @clp: client state handle + * + * Returns one if there are any nfs_delegation structures attached + * to this nfs_client. + */ +int nfs_delegations_present(struct nfs_client *clp) +{ + struct nfs_server *server; + int ret = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + if (!list_empty(&server->delegations)) { + ret = 1; + break; + } + rcu_read_unlock(); + return ret; +} + +/** + * nfs4_copy_delegation_stateid - Copy inode's state ID information + * @dst: stateid data structure to fill in + * @inode: inode to check + * + * Returns one and fills in "dst->data" * if inode had a delegation, + * otherwise zero is returned. + */ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 2026304bda1..d9322e490c5 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -44,6 +44,7 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); +int nfs_delegations_present(struct nfs_client *clp); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e257172d438..2c3eb33b904 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,11 +33,13 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/sched.h> +#include <linux/kmemleak.h> +#include <linux/xattr.h> -#include "nfs4_fs.h" #include "delegation.h" #include "iostat.h" #include "internal.h" +#include "fscache.h" /* #define NFS_DEBUG_VERBOSE 1 */ @@ -55,6 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); +static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -80,6 +83,10 @@ const struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; +const struct address_space_operations nfs_dir_aops = { + .freepage = nfs_readdir_clear_array, +}; + #ifdef CONFIG_NFS_V3 const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, @@ -104,8 +111,9 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_create, + .create = nfs_open_create, .lookup = nfs_atomic_lookup, .link = nfs_link, .unlink = nfs_unlink, @@ -117,9 +125,10 @@ const struct inode_operations nfs4_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, - .getxattr = nfs4_getxattr, - .setxattr = nfs4_setxattr, - .listxattr = nfs4_listxattr, + .getxattr = generic_getxattr, + .setxattr = generic_setxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, }; #endif /* CONFIG_NFS_V4 */ @@ -150,51 +159,209 @@ nfs_opendir(struct inode *inode, struct file *filp) return res; } -typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); +struct nfs_cache_array_entry { + u64 cookie; + u64 ino; + struct qstr string; + unsigned char d_type; +}; + +struct nfs_cache_array { + unsigned int size; + int eof_index; + u64 last_cookie; + struct nfs_cache_array_entry array[0]; +}; + +typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int); typedef struct { struct file *file; struct page *page; unsigned long page_index; - __be32 *ptr; u64 *dir_cookie; + u64 last_cookie; loff_t current_index; - struct nfs_entry *entry; decode_dirent_t decode; - int plus; + unsigned long timestamp; unsigned long gencount; - int timestamp_valid; + unsigned int cache_entry_index; + unsigned int plus:1; + unsigned int eof:1; } nfs_readdir_descriptor_t; -/* Now we cache directories properly, by stuffing the dirent - * data directly in the page cache. - * - * Inode invalidation due to refresh etc. takes care of - * _everything_, no sloppy entry flushing logic, no extraneous - * copying, network direct to page cache, the way it was meant - * to be. - * - * NOTE: Dirent information verification is done always by the - * page-in of the RPC reply, nowhere else, this simplies - * things substantially. +/* + * The caller is responsible for calling nfs_readdir_release_array(page) */ static -int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) +struct nfs_cache_array *nfs_readdir_get_array(struct page *page) +{ + void *ptr; + if (page == NULL) + return ERR_PTR(-EIO); + ptr = kmap(page); + if (ptr == NULL) + return ERR_PTR(-ENOMEM); + return ptr; +} + +static +void nfs_readdir_release_array(struct page *page) +{ + kunmap(page); +} + +/* + * we are freeing strings created by nfs_add_to_readdir_array() + */ +static +void nfs_readdir_clear_array(struct page *page) +{ + struct nfs_cache_array *array; + int i; + + array = kmap_atomic(page, KM_USER0); + for (i = 0; i < array->size; i++) + kfree(array->array[i].string.name); + kunmap_atomic(array, KM_USER0); +} + +/* + * the caller is responsible for freeing qstr.name + * when called by nfs_readdir_add_to_array, the strings will be freed in + * nfs_clear_readdir_array() + */ +static +int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) +{ + string->len = len; + string->name = kmemdup(name, len, GFP_KERNEL); + if (string->name == NULL) + return -ENOMEM; + /* + * Avoid a kmemleak false positive. The pointer to the name is stored + * in a page cache page which kmemleak does not scan. + */ + kmemleak_not_leak(string->name); + string->hash = full_name_hash(name, len); + return 0; +} + +static +int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) +{ + struct nfs_cache_array *array = nfs_readdir_get_array(page); + struct nfs_cache_array_entry *cache_entry; + int ret; + + if (IS_ERR(array)) + return PTR_ERR(array); + + cache_entry = &array->array[array->size]; + + /* Check that this entry lies within the page bounds */ + ret = -ENOSPC; + if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE) + goto out; + + cache_entry->cookie = entry->prev_cookie; + cache_entry->ino = entry->ino; + cache_entry->d_type = entry->d_type; + ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); + if (ret) + goto out; + array->last_cookie = entry->cookie; + array->size++; + if (entry->eof != 0) + array->eof_index = array->size; +out: + nfs_readdir_release_array(page); + return ret; +} + +static +int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +{ + loff_t diff = desc->file->f_pos - desc->current_index; + unsigned int index; + + if (diff < 0) + goto out_eof; + if (diff >= array->size) { + if (array->eof_index >= 0) + goto out_eof; + desc->current_index += array->size; + return -EAGAIN; + } + + index = (unsigned int)diff; + *desc->dir_cookie = array->array[index].cookie; + desc->cache_entry_index = index; + return 0; +out_eof: + desc->eof = 1; + return -EBADCOOKIE; +} + +static +int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +{ + int i; + int status = -EAGAIN; + + for (i = 0; i < array->size; i++) { + if (array->array[i].cookie == *desc->dir_cookie) { + desc->cache_entry_index = i; + return 0; + } + } + if (array->eof_index >= 0) { + status = -EBADCOOKIE; + if (*desc->dir_cookie == array->last_cookie) + desc->eof = 1; + } + return status; +} + +static +int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) +{ + struct nfs_cache_array *array; + int status; + + array = nfs_readdir_get_array(desc->page); + if (IS_ERR(array)) { + status = PTR_ERR(array); + goto out; + } + + if (*desc->dir_cookie == 0) + status = nfs_readdir_search_for_pos(array, desc); + else + status = nfs_readdir_search_for_cookie(array, desc); + + if (status == -EAGAIN) { + desc->last_cookie = array->last_cookie; + desc->page_index++; + } + nfs_readdir_release_array(desc->page); +out: + return status; +} + +/* Fill a page with xdr information before transferring to the cache page */ +static +int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, + struct nfs_entry *entry, struct file *file, struct inode *inode) { - struct file *file = desc->file; - struct inode *inode = file->f_path.dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); unsigned long timestamp, gencount; int error; - dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", - __func__, (long long)desc->entry->cookie, - page->index); - again: timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); - error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, + error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ @@ -208,199 +375,312 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) } desc->timestamp = timestamp; desc->gencount = gencount; - desc->timestamp_valid = 1; - SetPageUptodate(page); - /* Ensure consistent page alignment of the data. - * Note: assumes we have exclusive access to this mapping either - * through inode->i_mutex or some other mechanism. - */ - if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { - /* Should never happen */ - nfs_zap_mapping(inode, inode->i_mapping); - } - unlock_page(page); - return 0; - error: - unlock_page(page); - return -EIO; +error: + return error; } -static inline -int dir_decode(nfs_readdir_descriptor_t *desc) -{ - __be32 *p = desc->ptr; - p = desc->decode(p, desc->entry, desc->plus); - if (IS_ERR(p)) - return PTR_ERR(p); - desc->ptr = p; - if (desc->timestamp_valid) { - desc->entry->fattr->time_start = desc->timestamp; - desc->entry->fattr->gencount = desc->gencount; - } else - desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; +static int xdr_decode(nfs_readdir_descriptor_t *desc, + struct nfs_entry *entry, struct xdr_stream *xdr) +{ + int error; + + error = desc->decode(xdr, entry, desc->plus); + if (error) + return error; + entry->fattr->time_start = desc->timestamp; + entry->fattr->gencount = desc->gencount; return 0; } -static inline -void dir_page_release(nfs_readdir_descriptor_t *desc) +static +int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { - kunmap(desc->page); - page_cache_release(desc->page); - desc->page = NULL; - desc->ptr = NULL; + if (dentry->d_inode == NULL) + goto different; + if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0) + goto different; + return 1; +different: + return 0; } -/* - * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry with cookie '*desc->dir_cookie'. - * - * If the end of the buffer has been reached, return -EAGAIN, if not, - * return the offset within the buffer of the next entry to be - * read. - */ -static inline -int find_dirent(nfs_readdir_descriptor_t *desc) +static +void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { - struct nfs_entry *entry = desc->entry; - int loop_count = 0, - status; + struct qstr filename = { + .len = entry->len, + .name = entry->name, + }; + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct inode *inode; - while((status = dir_decode(desc)) == 0) { - dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", - __func__, (unsigned long long)entry->cookie); - if (entry->prev_cookie == *desc->dir_cookie) - break; - if (loop_count++ > 200) { - loop_count = 0; - schedule(); + if (filename.name[0] == '.') { + if (filename.len == 1) + return; + if (filename.len == 2 && filename.name[1] == '.') + return; + } + filename.hash = full_name_hash(filename.name, filename.len); + + dentry = d_lookup(parent, &filename); + if (dentry != NULL) { + if (nfs_same_file(dentry, entry)) { + nfs_refresh_inode(dentry->d_inode, entry->fattr); + goto out; + } else { + d_drop(dentry); + dput(dentry); } } - return status; + + dentry = d_alloc(parent, &filename); + if (dentry == NULL) + return; + + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); + if (IS_ERR(inode)) + goto out; + + alias = d_materialise_unique(dentry, inode); + if (IS_ERR(alias)) + goto out; + else if (alias) { + nfs_set_verifier(alias, nfs_save_change_attribute(dir)); + dput(alias); + } else + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + +out: + dput(dentry); } -/* - * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the entry at offset 'desc->file->f_pos'. - * - * If the end of the buffer has been reached, return -EAGAIN, if not, - * return the offset within the buffer of the next entry to be - * read. - */ -static inline -int find_dirent_index(nfs_readdir_descriptor_t *desc) +/* Perform conversion from xdr to cache array */ +static +int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, + struct page **xdr_pages, struct page *page, unsigned int buflen) { - struct nfs_entry *entry = desc->entry; - int loop_count = 0, - status; + struct xdr_stream stream; + struct xdr_buf buf = { + .pages = xdr_pages, + .page_len = buflen, + .buflen = buflen, + .len = buflen, + }; + struct page *scratch; + struct nfs_cache_array *array; + unsigned int count = 0; + int status; - for(;;) { - status = dir_decode(desc); - if (status) - break; + scratch = alloc_page(GFP_KERNEL); + if (scratch == NULL) + return -ENOMEM; - dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", - (unsigned long long)entry->cookie, desc->current_index); + xdr_init_decode(&stream, &buf, NULL); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); - if (desc->file->f_pos == desc->current_index) { - *desc->dir_cookie = entry->cookie; + do { + status = xdr_decode(desc, entry, &stream); + if (status != 0) { + if (status == -EAGAIN) + status = 0; break; } - desc->current_index++; - if (loop_count++ > 200) { - loop_count = 0; - schedule(); - } + + count++; + + if (desc->plus != 0) + nfs_prime_dcache(desc->file->f_path.dentry, entry); + + status = nfs_readdir_add_to_array(entry, page); + if (status != 0) + break; + } while (!entry->eof); + + if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { + array = nfs_readdir_get_array(page); + if (!IS_ERR(array)) { + array->eof_index = array->size; + status = 0; + nfs_readdir_release_array(page); + } else + status = PTR_ERR(array); } + + put_page(scratch); return status; } +static +void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages) +{ + unsigned int i; + for (i = 0; i < npages; i++) + put_page(pages[i]); +} + +static +void nfs_readdir_free_large_page(void *ptr, struct page **pages, + unsigned int npages) +{ + nfs_readdir_free_pagearray(pages, npages); +} + /* - * Find the given page, and call find_dirent() or find_dirent_index in - * order to try to return the next entry. + * nfs_readdir_large_page will allocate pages that must be freed with a call + * to nfs_readdir_free_large_page */ -static inline -int find_dirent_page(nfs_readdir_descriptor_t *desc) +static +int nfs_readdir_large_page(struct page **pages, unsigned int npages) { - struct inode *inode = desc->file->f_path.dentry->d_inode; - struct page *page; - int status; + unsigned int i; - dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", - __func__, desc->page_index, - (long long) *desc->dir_cookie); + for (i = 0; i < npages; i++) { + struct page *page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out_freepages; + pages[i] = page; + } + return 0; - /* If we find the page in the page_cache, we cannot be sure - * how fresh the data is, so we will ignore readdir_plus attributes. - */ - desc->timestamp_valid = 0; - page = read_cache_page(inode->i_mapping, desc->page_index, - (filler_t *)nfs_readdir_filler, desc); - if (IS_ERR(page)) { - status = PTR_ERR(page); +out_freepages: + nfs_readdir_free_pagearray(pages, i); + return -ENOMEM; +} + +static +int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) +{ + struct page *pages[NFS_MAX_READDIR_PAGES]; + void *pages_ptr = NULL; + struct nfs_entry entry; + struct file *file = desc->file; + struct nfs_cache_array *array; + int status = -ENOMEM; + unsigned int array_size = ARRAY_SIZE(pages); + + entry.prev_cookie = 0; + entry.cookie = desc->last_cookie; + entry.eof = 0; + entry.fh = nfs_alloc_fhandle(); + entry.fattr = nfs_alloc_fattr(); + entry.server = NFS_SERVER(inode); + if (entry.fh == NULL || entry.fattr == NULL) + goto out; + + array = nfs_readdir_get_array(page); + if (IS_ERR(array)) { + status = PTR_ERR(array); goto out; } + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; - /* NOTE: Someone else may have changed the READDIRPLUS flag */ - desc->page = page; - desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - if (*desc->dir_cookie != 0) - status = find_dirent(desc); - else - status = find_dirent_index(desc); + status = nfs_readdir_large_page(pages, array_size); if (status < 0) - dir_page_release(desc); - out: - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); + goto out_release_array; + do { + unsigned int pglen; + status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); + + if (status < 0) + break; + pglen = status; + status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); + if (status < 0) { + if (status == -ENOSPC) + status = 0; + break; + } + } while (array->eof_index < 0); + + nfs_readdir_free_large_page(pages_ptr, pages, array_size); +out_release_array: + nfs_readdir_release_array(page); +out: + nfs_free_fattr(entry.fattr); + nfs_free_fhandle(entry.fh); return status; } /* - * Recurse through the page cache pages, and return a - * filled nfs_entry structure of the next directory entry if possible. - * - * The target for the search is '*desc->dir_cookie' if non-0, - * 'desc->file->f_pos' otherwise + * Now we cache directories properly, by converting xdr information + * to an array that can be used for lookups later. This results in + * fewer cache pages, since we can store more information on each page. + * We only need to convert from xdr once so future lookups are much simpler */ -static inline -int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) +static +int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) { - int loop_count = 0; - int res; + struct inode *inode = desc->file->f_path.dentry->d_inode; + int ret; - /* Always search-by-index from the beginning of the cache */ - if (*desc->dir_cookie == 0) { - dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n", - (long long)desc->file->f_pos); - desc->page_index = 0; - desc->entry->cookie = desc->entry->prev_cookie = 0; - desc->entry->eof = 0; - desc->current_index = 0; - } else - dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", - (unsigned long long)*desc->dir_cookie); + ret = nfs_readdir_xdr_to_array(desc, page, inode); + if (ret < 0) + goto error; + SetPageUptodate(page); - for (;;) { - res = find_dirent_page(desc); - if (res != -EAGAIN) - break; - /* Align to beginning of next page */ - desc->page_index ++; - if (loop_count++ > 200) { - loop_count = 0; - schedule(); - } + if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { + /* Should never happen */ + nfs_zap_mapping(inode, inode->i_mapping); } + unlock_page(page); + return 0; + error: + unlock_page(page); + return ret; +} - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res); - return res; +static +void cache_page_release(nfs_readdir_descriptor_t *desc) +{ + if (!desc->page->mapping) + nfs_readdir_clear_array(desc->page); + page_cache_release(desc->page); + desc->page = NULL; } -static inline unsigned int dt_type(struct inode *inode) +static +struct page *get_cache_page(nfs_readdir_descriptor_t *desc) +{ + return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, + desc->page_index, (filler_t *)nfs_readdir_filler, desc); +} + +/* + * Returns 0 if desc->dir_cookie was found on page desc->page_index + */ +static +int find_cache_page(nfs_readdir_descriptor_t *desc) { - return (inode->i_mode >> 12) & 15; + int res; + + desc->page = get_cache_page(desc); + if (IS_ERR(desc->page)) + return PTR_ERR(desc->page); + + res = nfs_readdir_search_array(desc); + if (res != 0) + cache_page_release(desc); + return res; } -static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc); +/* Search for desc->dir_cookie from the beginning of the page cache */ +static inline +int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) +{ + int res; + + if (desc->page_index == 0) { + desc->current_index = 0; + desc->last_cookie = 0; + } + do { + res = find_cache_page(desc); + } while (res == -EAGAIN); + return res; +} /* * Once we've found the start of the dirent within a page: fill 'er up... @@ -410,51 +690,38 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, filldir_t filldir) { struct file *file = desc->file; - struct nfs_entry *entry = desc->entry; - struct dentry *dentry = NULL; - u64 fileid; - int loop_count = 0, - res; - - dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", - (unsigned long long)entry->cookie); - - for(;;) { - unsigned d_type = DT_UNKNOWN; - /* Note: entry->prev_cookie contains the cookie for - * retrieving the current dirent on the server */ - fileid = entry->ino; - - /* Get a dentry if we have one */ - if (dentry != NULL) - dput(dentry); - dentry = nfs_readdir_lookup(desc); + int i = 0; + int res = 0; + struct nfs_cache_array *array = NULL; - /* Use readdirplus info */ - if (dentry != NULL && dentry->d_inode != NULL) { - d_type = dt_type(dentry->d_inode); - fileid = NFS_FILEID(dentry->d_inode); - } + array = nfs_readdir_get_array(desc->page); + if (IS_ERR(array)) { + res = PTR_ERR(array); + goto out; + } - res = filldir(dirent, entry->name, entry->len, - file->f_pos, nfs_compat_user_ino64(fileid), - d_type); - if (res < 0) - break; - file->f_pos++; - *desc->dir_cookie = entry->cookie; - if (dir_decode(desc) != 0) { - desc->page_index ++; + for (i = desc->cache_entry_index; i < array->size; i++) { + struct nfs_cache_array_entry *ent; + + ent = &array->array[i]; + if (filldir(dirent, ent->string.name, ent->string.len, + file->f_pos, nfs_compat_user_ino64(ent->ino), + ent->d_type) < 0) { + desc->eof = 1; break; } - if (loop_count++ > 200) { - loop_count = 0; - schedule(); - } + file->f_pos++; + if (i < (array->size-1)) + *desc->dir_cookie = array->array[i+1].cookie; + else + *desc->dir_cookie = array->last_cookie; } - dir_page_release(desc); - if (dentry != NULL) - dput(dentry); + if (array->eof_index >= 0) + desc->eof = 1; + + nfs_readdir_release_array(desc->page); +out: + cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -476,12 +743,9 @@ static inline int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, filldir_t filldir) { - struct file *file = desc->file; - struct inode *inode = file->f_path.dentry->d_inode; - struct rpc_cred *cred = nfs_file_cred(file); struct page *page = NULL; int status; - unsigned long timestamp, gencount; + struct inode *inode = desc->file->f_path.dentry->d_inode; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); @@ -491,38 +755,23 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, status = -ENOMEM; goto out; } - timestamp = jiffies; - gencount = nfs_inc_attr_generation_counter(); - status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, - *desc->dir_cookie, page, - NFS_SERVER(inode)->dtsize, - desc->plus); + + desc->page_index = 0; + desc->last_cookie = *desc->dir_cookie; desc->page = page; - desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - if (status >= 0) { - desc->timestamp = timestamp; - desc->gencount = gencount; - desc->timestamp_valid = 1; - if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = *desc->dir_cookie; - } else - status = -EIO; + + status = nfs_readdir_xdr_to_array(desc, page, inode); if (status < 0) goto out_release; status = nfs_do_filldir(desc, dirent, filldir); - /* Reset read descriptor so it searches the page cache from - * the start upon the next call to readdir_search_pagecache() */ - desc->page_index = 0; - desc->entry->cookie = desc->entry->prev_cookie = 0; - desc->entry->eof = 0; out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; out_release: - dir_page_release(desc); + cache_page_release(desc); goto out; } @@ -536,8 +785,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct inode *inode = dentry->d_inode; nfs_readdir_descriptor_t my_desc, *desc = &my_desc; - struct nfs_entry my_entry; - int res = -ENOMEM; + int res; dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -557,57 +805,44 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); - my_entry.cookie = my_entry.prev_cookie = 0; - my_entry.eof = 0; - my_entry.fh = nfs_alloc_fhandle(); - my_entry.fattr = nfs_alloc_fattr(); - if (my_entry.fh == NULL || my_entry.fattr == NULL) - goto out_alloc_failed; - - desc->entry = &my_entry; - nfs_block_sillyrename(dentry); res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) goto out; - while(!desc->entry->eof) { + do { res = readdir_search_pagecache(desc); if (res == -EBADCOOKIE) { + res = 0; /* This means either end of directory */ - if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { + if (*desc->dir_cookie && desc->eof == 0) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); - if (res >= 0) + if (res == 0) continue; } - res = 0; break; } if (res == -ETOOSMALL && desc->plus) { clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); nfs_zap_caches(inode); + desc->page_index = 0; desc->plus = 0; - desc->entry->eof = 0; + desc->eof = 0; continue; } if (res < 0) break; res = nfs_do_filldir(desc, dirent, filldir); - if (res < 0) { - res = 0; + if (res < 0) break; - } - } + } while (!desc->eof); out: nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; -out_alloc_failed: - nfs_free_fattr(my_entry.fattr); - nfs_free_fhandle(my_entry.fh); dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, res); @@ -703,7 +938,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) * component of the path. * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. */ -static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) +static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, + unsigned int mask) { if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) return 0; @@ -734,7 +970,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) { struct nfs_server *server = NFS_SERVER(inode); - if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags)) + if (IS_AUTOMOUNT(inode)) return 0; if (nd != NULL) { /* VFS wants an on-the-wire revalidation */ @@ -783,7 +1019,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) +static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *dir; struct inode *inode; @@ -792,6 +1028,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + parent = dget_parent(dentry); dir = parent->d_inode; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); @@ -882,7 +1121,7 @@ out_error: /* * This is called from dput() when d_count is going to 0. */ -static int nfs_dentry_delete(struct dentry *dentry) +static int nfs_dentry_delete(const struct dentry *dentry) { dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -934,6 +1173,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_revalidate = nfs_lookup_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, + .d_automount = nfs_d_automount, }; static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) @@ -953,8 +1193,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; - /* * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. @@ -982,7 +1220,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru goto out_unblock_sillyrename; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); - res = (struct dentry *)inode; + res = ERR_CAST(inode); if (IS_ERR(res)) goto out_unblock_sillyrename; @@ -1009,6 +1247,7 @@ const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs_open_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, + .d_automount = nfs_d_automount, }; /* @@ -1029,10 +1268,63 @@ static int is_atomic_open(struct nameidata *nd) return 1; } +static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) +{ + struct path path = { + .mnt = nd->path.mnt, + .dentry = dentry, + }; + struct nfs_open_context *ctx; + struct rpc_cred *cred; + fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return ERR_CAST(cred); + ctx = alloc_nfs_open_context(&path, cred, fmode); + put_rpccred(cred); + if (ctx == NULL) + return ERR_PTR(-ENOMEM); + return ctx; +} + +static int do_open(struct inode *inode, struct file *filp) +{ + nfs_fscache_set_inode_cookie(inode, filp); + return 0; +} + +static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) +{ + struct file *filp; + int ret = 0; + + /* If the open_intent is for execute, we have an extra check to make */ + if (ctx->mode & FMODE_EXEC) { + ret = nfs_may_open(ctx->path.dentry->d_inode, + ctx->cred, + nd->intent.open.flags); + if (ret < 0) + goto out; + } + filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); + if (IS_ERR(filp)) + ret = PTR_ERR(filp); + else + nfs_file_set_open_context(filp, ctx); +out: + put_nfs_open_context(ctx); + return ret; +} + static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct nfs_open_context *ctx; + struct iattr attr; struct dentry *res = NULL; - int error; + struct inode *inode; + int open_flags; + int err; dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1045,7 +1337,6 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = ERR_PTR(-ENAMETOOLONG); goto out; } - dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ @@ -1054,29 +1345,61 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry goto out; } + ctx = nameidata_to_nfs_open_context(dentry, nd); + res = ERR_CAST(ctx); + if (IS_ERR(ctx)) + goto out; + + open_flags = nd->intent.open.flags; + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + attr.ia_mode &= ~current_umask(); + } else { + open_flags &= ~(O_EXCL | O_CREAT); + attr.ia_valid = 0; + } + /* Open the file on the server */ - res = nfs4_atomic_open(dir, dentry, nd); - if (IS_ERR(res)) { - error = PTR_ERR(res); - switch (error) { + nfs_block_sillyrename(dentry->d_parent); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + if (IS_ERR(inode)) { + nfs_unblock_sillyrename(dentry->d_parent); + put_nfs_open_context(ctx); + switch (PTR_ERR(inode)) { /* Make a negative dentry */ case -ENOENT: + d_add(dentry, NULL); res = NULL; goto out; /* This turned out not to be a regular file */ - case -EISDIR: case -ENOTDIR: goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; + /* case -EISDIR: */ /* case -EINVAL: */ default: + res = ERR_CAST(inode); goto out; } - } else if (res != NULL) + } + res = d_add_unique(dentry, inode); + nfs_unblock_sillyrename(dentry->d_parent); + if (res != NULL) { + dput(ctx->path.dentry); + ctx->path.dentry = dget(res); dentry = res; + } + err = nfs_intent_set_file(nd, ctx); + if (err < 0) { + if (res != NULL) + dput(res); + return ERR_PTR(err); + } out: + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); return res; no_open: return nfs_lookup(dir, dentry, nd); @@ -1085,14 +1408,21 @@ no_open: static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *parent = NULL; - struct inode *inode = dentry->d_inode; + struct inode *inode; struct inode *dir; + struct nfs_open_context *ctx; int openflags, ret = 0; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; + parent = dget_parent(dentry); dir = parent->d_inode; + /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ @@ -1112,99 +1442,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); + ctx = nameidata_to_nfs_open_context(dentry, nd); + ret = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; /* * Note: we're not holding inode->i_mutex and so may be racing with * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - ret = nfs4_open_revalidate(dir, dentry, openflags, nd); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + switch (ret) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + ret = nfs_intent_set_file(nd, ctx); + if (ret >= 0) + ret = 1; out: dput(parent); - if (!ret) - d_drop(dentry); return ret; +out_drop: + d_drop(dentry); + ret = 0; +out_put_ctx: + put_nfs_open_context(ctx); + goto out; + no_open_dput: dput(parent); no_open: return nfs_lookup_revalidate(dentry, nd); } -#endif /* CONFIG_NFSV4 */ -static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) +static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { - struct dentry *parent = desc->file->f_path.dentry; - struct inode *dir = parent->d_inode; - struct nfs_entry *entry = desc->entry; - struct dentry *dentry, *alias; - struct qstr name = { - .name = entry->name, - .len = entry->len, - }; - struct inode *inode; - unsigned long verf = nfs_save_change_attribute(dir); + struct nfs_open_context *ctx = NULL; + struct iattr attr; + int error; + int open_flags = 0; - switch (name.len) { - case 2: - if (name.name[0] == '.' && name.name[1] == '.') - return dget_parent(parent); - break; - case 1: - if (name.name[0] == '.') - return dget(parent); - } + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - spin_lock(&dir->i_lock); - if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { - spin_unlock(&dir->i_lock); - return NULL; - } - spin_unlock(&dir->i_lock); + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; - name.hash = full_name_hash(name.name, name.len); - dentry = d_lookup(parent, &name); - if (dentry != NULL) { - /* Is this a positive dentry that matches the readdir info? */ - if (dentry->d_inode != NULL && - (NFS_FILEID(dentry->d_inode) == entry->ino || - d_mountpoint(dentry))) { - if (!desc->plus || entry->fh->size == 0) - return dentry; - if (nfs_compare_fh(NFS_FH(dentry->d_inode), - entry->fh) == 0) - goto out_renew; - } - /* No, so d_drop to allow one to be created */ - d_drop(dentry); - dput(dentry); - } - if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) - return NULL; - if (name.len > NFS_SERVER(dir)->namelen) - return NULL; - /* Note: caller is already holding the dir->i_mutex! */ - dentry = d_alloc(parent, &name); - if (dentry == NULL) - return NULL; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; - inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); - if (IS_ERR(inode)) { - dput(dentry); - return NULL; - } + if ((nd->flags & LOOKUP_CREATE) != 0) { + open_flags = nd->intent.open.flags; - alias = d_materialise_unique(dentry, inode); - if (alias != NULL) { - dput(dentry); - if (IS_ERR(alias)) - return NULL; - dentry = alias; + ctx = nameidata_to_nfs_open_context(dentry, nd); + error = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out_err_drop; } -out_renew: - nfs_set_verifier(dentry, verf); - return dentry; + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); + if (error != 0) + goto out_put_ctx; + if (ctx != NULL) { + error = nfs_intent_set_file(nd, ctx); + if (error < 0) + goto out_err; + } + return 0; +out_put_ctx: + if (ctx != NULL) + put_nfs_open_context(ctx); +out_err_drop: + d_drop(dentry); +out_err: + return error; } +#endif /* CONFIG_NFSV4 */ + /* * Code common to create, mkdir, and mknod. */ @@ -1269,7 +1596,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, if ((nd->flags & LOOKUP_CREATE) != 0) open_flags = nd->intent.open.flags; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); if (error != 0) goto out_err; return 0; @@ -1351,76 +1678,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } -static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) -{ - static unsigned int sillycounter; - const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; - const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs")+fileidsize+countersize-1; - char silly[slen+1]; - struct qstr qsilly; - struct dentry *sdentry; - int error = -EIO; - - dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - atomic_read(&dentry->d_count)); - nfs_inc_stats(dir, NFSIOS_SILLYRENAME); - - /* - * We don't allow a dentry to be silly-renamed twice. - */ - error = -EBUSY; - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) - goto out; - - sprintf(silly, ".nfs%*.*Lx", - fileidsize, fileidsize, - (unsigned long long)NFS_FILEID(dentry->d_inode)); - - /* Return delegation in anticipation of the rename */ - nfs_inode_return_delegation(dentry->d_inode); - - sdentry = NULL; - do { - char *suffix = silly + slen - countersize; - - dput(sdentry); - sillycounter++; - sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); - - dfprintk(VFS, "NFS: trying to rename %s to %s\n", - dentry->d_name.name, silly); - - sdentry = lookup_one_len(silly, dentry->d_parent, slen); - /* - * N.B. Better to return EBUSY here ... it could be - * dangerous to delete the file while it's in use. - */ - if (IS_ERR(sdentry)) - goto out; - } while(sdentry->d_inode != NULL); /* need negative lookup */ - - qsilly.name = silly; - qsilly.len = strlen(silly); - if (dentry->d_inode) { - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, - dir, &qsilly); - nfs_mark_for_revalidate(dentry->d_inode); - } else - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, - dir, &qsilly); - if (!error) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - d_move(dentry, sdentry); - error = nfs_async_unlink(dir, dentry); - /* If we return 0 we don't unlink */ - } - dput(sdentry); -out: - return error; -} - /* * Remove a file after making sure there are no pending writes, * and after checking that the file has only one user. @@ -1471,11 +1728,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); @@ -1486,7 +1741,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) need_rehash = 1; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); if (!error || error == -ENOENT) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -1580,7 +1834,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { - atomic_inc(&inode->i_count); + ihold(inode); d_add(dentry, inode); } return error; @@ -1621,7 +1875,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, - atomic_read(&new_dentry->d_count)); + new_dentry->d_count); /* * For non-directories, check whether the target is busy and if so, @@ -1639,7 +1893,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, rehash = new_dentry; } - if (atomic_read(&new_dentry->d_count) > 2) { + if (new_dentry->d_count > 2) { int err; /* copy the target dentry's name */ @@ -1711,14 +1965,14 @@ static void nfs_access_free_list(struct list_head *head) int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(head); - struct nfs_inode *nfsi; + struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; spin_lock(&nfs_access_lru_lock); - list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { + list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; if (nr_to_scan-- == 0) @@ -1941,11 +2195,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } -int nfs_permission(struct inode *inode, int mask) +int nfs_permission(struct inode *inode, int mask, unsigned int flags) { struct rpc_cred *cred; int res = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) @@ -1993,7 +2250,7 @@ out: out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) - res = generic_permission(inode, mask, NULL); + res = generic_permission(inode, mask, flags, NULL); goto out; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 064a8096167..9943a75bb6d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + /* + * If no bytes were started, return the error, and let the + * generic layer handle the completion. + */ + if (requested_bytes == 0) { + nfs_direct_req_release(dreq); + return result < 0 ? result : -EIO; + } + if (put_dreq(dreq)) nfs_direct_complete(dreq); - - if (requested_bytes != 0) - return 0; - - if (result < 0) - return result; - return -EIO; + return 0; } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, @@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + /* + * If no bytes were started, return the error, and let the + * generic layer handle the completion. + */ + if (requested_bytes == 0) { + nfs_direct_req_release(dreq); + return result < 0 ? result : -EIO; + } + if (put_dreq(dreq)) nfs_direct_write_complete(dreq, dreq->inode); - - if (requested_bytes != 0) - return 0; - - if (result < 0) - return result; - return -EIO; + return 0; } static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, @@ -867,13 +873,13 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, goto out; nfs_alloc_commit_data(dreq); - if (dreq->commit_data == NULL || count < wsize) + if (dreq->commit_data == NULL || count <= wsize) sync = NFS_FILE_SYNC; dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); dreq->l_ctx = nfs_get_lock_context(dreq->ctx); - if (dreq->l_ctx != NULL) + if (dreq->l_ctx == NULL) goto out_release; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index dba50a5625d..a6e711ad130 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, return 0; } item = container_of(h, struct nfs_dns_ent, h); - ttl = (long)item->h.expiry_time - (long)get_seconds(); + ttl = item->h.expiry_time - seconds_since_boot(); if (ttl < 0) ttl = 0; @@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) ttl = get_expiry(&buf); if (ttl == 0) goto out; - key.h.expiry_time = ttl + get_seconds(); + key.h.expiry_time = ttl + seconds_since_boot(); ret = -ENOMEM; item = nfs_dns_lookup(cd, &key); @@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd, goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || cd->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 05bf3c0dc75..7bf029ef408 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -36,6 +36,7 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); + pnfs_update_layout(mapping->host, + nfs_file_open_context(file), + IOMODE_RW); + start: /* * Prevent starvation issues if someone is doing a consistency @@ -551,7 +556,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct file *filp = vma->vm_file; struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; - int ret = -EINVAL; + int ret = VM_FAULT_NOPAGE; struct address_space *mapping; dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", @@ -567,21 +572,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (mapping != dentry->d_inode->i_mapping) goto out_unlock; - ret = 0; pagelen = nfs_page_length(page); if (pagelen == 0) goto out_unlock; - ret = nfs_flush_incompatible(filp, page); - if (ret != 0) - goto out_unlock; + ret = VM_FAULT_LOCKED; + if (nfs_flush_incompatible(filp, page) == 0 && + nfs_updatepage(filp, page, 0, pagelen) == 0) + goto out; - ret = nfs_updatepage(filp, page, 0, pagelen); + ret = VM_FAULT_SIGBUS; out_unlock: - if (!ret) - return VM_FAULT_LOCKED; unlock_page(page); - return VM_FAULT_SIGBUS; +out: + return ret; } static const struct vm_operations_struct nfs_file_vm_ops = { @@ -684,10 +688,12 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, return ret; } -static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status = 0; + unsigned int saved_type = fl->fl_type; /* Try local locking first */ posix_test_lock(filp, fl); @@ -695,11 +701,12 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) /* found a conflict */ goto out; } + fl->fl_type = saved_type; if (nfs_have_delegation(inode, FMODE_READ)) goto out_noconflict; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + if (is_local) goto out_noconflict; status = NFS_PROTO(inode)->lock(filp, cmd, fl); @@ -726,7 +733,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) return res; } -static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +static int +do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status; @@ -741,15 +749,24 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. */ - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + /* + * Use local locking if mounted with "-onolock" or with appropriate + * "-olocal_lock=" + */ + if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); return status; } -static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +static int +is_time_granular(struct timespec *ts) { + return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000)); +} + +static int +do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status; @@ -762,20 +779,31 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) if (status != 0) goto out; - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + /* + * Use local locking if mounted with "-onolock" or with appropriate + * "-olocal_lock=" + */ + if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); if (status < 0) goto out; + /* - * Make sure we clear the cache whenever we try to get the lock. + * Revalidate the cache if the server has time stamps granular + * enough to detect subsecond changes. Otherwise, clear the + * cache to prevent missing any changes. + * * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!nfs_have_delegation(inode, FMODE_READ)) - nfs_zap_caches(inode); + if (!nfs_have_delegation(inode, FMODE_READ)) { + if (is_time_granular(&NFS_SERVER(inode)->time_delta)) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + nfs_zap_caches(inode); + } out: return status; } @@ -787,6 +815,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int ret = -ENOLCK; + int is_local = 0; dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", filp->f_path.dentry->d_parent->d_name.name, @@ -800,6 +829,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) goto out_err; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) + is_local = 1; + if (NFS_PROTO(inode)->lock_check_bounds != NULL) { ret = NFS_PROTO(inode)->lock_check_bounds(fl); if (ret < 0) @@ -807,11 +839,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) } if (IS_GETLK(cmd)) - ret = do_getlk(filp, cmd, fl); + ret = do_getlk(filp, cmd, fl, is_local); else if (fl->fl_type == F_UNLCK) - ret = do_unlk(filp, cmd, fl); + ret = do_unlk(filp, cmd, fl, is_local); else - ret = do_setlk(filp, cmd, fl); + ret = do_setlk(filp, cmd, fl, is_local); out_err: return ret; } @@ -821,6 +853,9 @@ out_err: */ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { + struct inode *inode = filp->f_mapping->host; + int is_local = 0; + dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name, @@ -829,14 +864,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) + is_local = 1; + /* We're simulating flock() locks using posix locks on the server */ fl->fl_owner = (fl_owner_t)filp; fl->fl_start = 0; fl->fl_end = OFFSET_MAX; if (fl->fl_type == F_UNLCK) - return do_unlk(filp, cmd, fl); - return do_setlk(filp, cmd, fl); + return do_unlk(filp, cmd, fl, is_local); + return do_setlk(filp, cmd, fl, is_local); } /* @@ -848,6 +886,5 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) dprintk("NFS: setlease(%s/%s, arg=%ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, arg); - return -EINVAL; } diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index a70e446e160..b5ffe8fa291 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i iput(inode); return -ENOMEM; } - /* Circumvent igrab(): we know the inode is not being freed */ - atomic_inc(&inode->i_count); + ihold(inode); /* * Ensure that this dentry is invisible to d_find_alias(). * Otherwise, it may be spliced into the tree by @@ -64,9 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i * This again causes shrink_dcache_for_umount_subtree() to * Oops, since the test for IS_ROOT() will fail. */ - spin_lock(&dcache_lock); + spin_lock(&sb->s_root->d_inode->i_lock); + spin_lock(&sb->s_root->d_lock); list_del_init(&sb->s_root->d_alias); - spin_unlock(&dcache_lock); + spin_unlock(&sb->s_root->d_lock); + spin_unlock(&sb->s_root->d_inode->i_lock); } return 0; } @@ -118,9 +119,6 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) } security_d_instantiate(ret, inode); - - if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; out: nfs_free_fattr(fsinfo.fattr); return ret; @@ -226,9 +224,6 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); - if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; - out: nfs_free_fattr(fattr); dprintk("<-- nfs4_get_root()\n"); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 21a84d45916..18696882f1c 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -34,6 +34,212 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef CONFIG_NFS_USE_NEW_IDMAPPER + +#include <linux/slab.h> +#include <linux/cred.h> +#include <linux/nfs_idmap.h> +#include <linux/keyctl.h> +#include <linux/key-type.h> +#include <linux/rcupdate.h> +#include <linux/kernel.h> +#include <linux/err.h> + +#include <keys/user-type.h> + +#define NFS_UINT_MAXLEN 11 + +const struct cred *id_resolver_cache; + +struct key_type key_type_id_resolver = { + .name = "id_resolver", + .instantiate = user_instantiate, + .match = user_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, +}; + +int nfs_idmap_init(void) +{ + struct cred *cred; + struct key *keyring; + int ret = 0; + + printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); + + cred = prepare_kernel_cred(NULL); + if (!cred) + return -ENOMEM; + + keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto failed_put_cred; + } + + ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); + if (ret < 0) + goto failed_put_key; + + ret = register_key_type(&key_type_id_resolver); + if (ret < 0) + goto failed_put_key; + + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + id_resolver_cache = cred; + return 0; + +failed_put_key: + key_put(keyring); +failed_put_cred: + put_cred(cred); + return ret; +} + +void nfs_idmap_quit(void) +{ + key_revoke(id_resolver_cache->thread_keyring); + unregister_key_type(&key_type_id_resolver); + put_cred(id_resolver_cache); +} + +/* + * Assemble the description to pass to request_key() + * This function will allocate a new string and update dest to point + * at it. The caller is responsible for freeing dest. + * + * On error 0 is returned. Otherwise, the length of dest is returned. + */ +static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, + const char *type, size_t typelen, char **desc) +{ + char *cp; + size_t desclen = typelen + namelen + 2; + + *desc = kmalloc(desclen, GFP_KERNEL); + if (!*desc) + return -ENOMEM; + + cp = *desc; + memcpy(cp, type, typelen); + cp += typelen; + *cp++ = ':'; + + memcpy(cp, name, namelen); + cp += namelen; + *cp = '\0'; + return desclen; +} + +static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, + const char *type, void *data, size_t data_size) +{ + const struct cred *saved_cred; + struct key *rkey; + char *desc; + struct user_key_payload *payload; + ssize_t ret; + + ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); + if (ret <= 0) + goto out; + + saved_cred = override_creds(id_resolver_cache); + rkey = request_key(&key_type_id_resolver, desc, ""); + revert_creds(saved_cred); + kfree(desc); + if (IS_ERR(rkey)) { + ret = PTR_ERR(rkey); + goto out; + } + + rcu_read_lock(); + rkey->perm |= KEY_USR_VIEW; + + ret = key_validate(rkey); + if (ret < 0) + goto out_up; + + payload = rcu_dereference(rkey->payload.data); + if (IS_ERR_OR_NULL(payload)) { + ret = PTR_ERR(payload); + goto out_up; + } + + ret = payload->datalen; + if (ret > 0 && ret <= data_size) + memcpy(data, payload->data, ret); + else + ret = -EINVAL; + +out_up: + rcu_read_unlock(); + key_put(rkey); +out: + return ret; +} + + +/* ID -> Name */ +static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) +{ + char id_str[NFS_UINT_MAXLEN]; + int id_len; + ssize_t ret; + + id_len = snprintf(id_str, sizeof(id_str), "%u", id); + ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); + if (ret < 0) + return -EINVAL; + return ret; +} + +/* Name -> ID */ +static int nfs_idmap_lookup_id(const char *name, size_t namelen, + const char *type, __u32 *id) +{ + char id_str[NFS_UINT_MAXLEN]; + long id_long; + ssize_t data_size; + int ret = 0; + + data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); + if (data_size <= 0) { + ret = -EINVAL; + } else { + ret = strict_strtol(id_str, 10, &id_long); + *id = (__u32)id_long; + } + return ret; +} + +int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + return nfs_idmap_lookup_id(name, namelen, "uid", uid); +} + +int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) +{ + return nfs_idmap_lookup_id(name, namelen, "gid", gid); +} + +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) +{ + return nfs_idmap_lookup_name(uid, "user", buf, buflen); +} +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) +{ + return nfs_idmap_lookup_name(gid, "group", buf, buflen); +} + +#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ + #include <linux/module.h> #include <linux/mutex.h> #include <linux/init.h> @@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); } -int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); } +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7d2d6c72aa7..1cc600e77bb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -48,6 +48,7 @@ #include "internal.h" #include "fscache.h" #include "dns_resolve.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -234,9 +235,6 @@ nfs_init_locked(struct inode *inode, void *opaque) return 0; } -/* Don't use READDIRPLUS on directories that we believe are too large */ -#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) - /* * This is our front-end to iget that looks up inodes by file handle * instead of inode number. @@ -291,8 +289,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; - if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) - && fattr->size <= NFS_LIMIT_READDIRPLUS) + inode->i_data.a_ops = &nfs_dir_aops; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ if ((fattr->valid & NFS_ATTR_FATTR_FSID) @@ -302,7 +300,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) else inode->i_op = &nfs_mountpoint_inode_operations; inode->i_fop = NULL; - set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags); + inode->i_flags |= S_AUTOMOUNT; } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; @@ -623,7 +621,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) nfs_revalidate_inode(server, inode); } -static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) +struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode) { struct nfs_open_context *ctx; @@ -633,11 +631,13 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct path_get(&ctx->path); ctx->cred = get_rpccred(cred); ctx->state = NULL; + ctx->mode = f_mode; ctx->flags = 0; ctx->error = 0; ctx->dir_cookie = 0; nfs_init_lock_context(&ctx->lock_context); ctx->lock_context.open_context = ctx; + INIT_LIST_HEAD(&ctx->list); } return ctx; } @@ -653,11 +653,15 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { struct inode *inode = ctx->path.dentry->d_inode; - if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) + if (!list_empty(&ctx->list)) { + if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) + return; + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + } else if (!atomic_dec_and_test(&ctx->lock_context.count)) return; - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - NFS_PROTO(inode)->close_context(ctx, is_sync); + if (inode != NULL) + NFS_PROTO(inode)->close_context(ctx, is_sync); if (ctx->cred != NULL) put_rpccred(ctx->cred); path_put(&ctx->path); @@ -673,7 +677,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { struct inode *inode = filp->f_path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -730,11 +734,10 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(&filp->f_path, cred); + ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; - ctx->mode = filp->f_mode; nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); nfs_fscache_set_inode_cookie(inode, filp); @@ -878,9 +881,10 @@ out: return ret; } -static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long ret = 0; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -888,25 +892,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->change_attr = fattr->change_attr; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; + ret |= NFS_INO_INVALID_ATTR; } /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) - && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + ret |= NFS_INO_INVALID_ATTR; + } if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) - i_size_write(inode, nfs_size_to_loff_t(fattr->size)); + && nfsi->npages == 0) { + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); + ret |= NFS_INO_INVALID_ATTR; + } + return ret; } /** @@ -1205,7 +1216,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Update the fsid? */ if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && !nfs_fsid_equal(&server->fsid, &fattr->fsid) && - !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) + !IS_AUTOMOUNT(inode)) server->fsid = fattr->fsid; /* @@ -1220,7 +1231,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ - nfs_wcc_update_inode(inode, fattr); + invalid |= nfs_wcc_update_inode(inode, fattr); /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { @@ -1407,6 +1418,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ void nfs4_evict_inode(struct inode *inode) { + pnfs_destroy_layout(NFS_I(inode)); truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); /* If we are holding a delegation, return it! */ @@ -1434,11 +1446,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return &nfsi->vfs_inode; } -void nfs_destroy_inode(struct inode *inode) +static void nfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } +void nfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nfs_i_callback); +} + static inline void nfs4_init_once(struct nfs_inode *nfsi) { #ifdef CONFIG_NFS_V4 @@ -1446,6 +1465,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) nfsi->delegation = NULL; nfsi->delegation_state = 0; init_rwsem(&nfsi->rwsem); + nfsi->layout = NULL; #endif } @@ -1493,7 +1513,7 @@ static int nfsiod_start(void) { struct workqueue_struct *wq; dprintk("RPC: creating workqueue nfsiod\n"); - wq = create_singlethread_workqueue("nfsiod"); + wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); if (wq == NULL) return -ENOMEM; nfsiod_workqueue = wq; @@ -1521,6 +1541,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_idmap_init(); + if (err < 0) + goto out9; + err = nfs_dns_resolver_init(); if (err < 0) goto out8; @@ -1585,6 +1609,8 @@ out6: out7: nfs_dns_resolver_destroy(); out8: + nfs_idmap_quit(); +out9: return err; } @@ -1597,9 +1623,11 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); nfs_dns_resolver_destroy(); + nfs_idmap_quit(); #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif + nfs_cleanup_cb_ident_idr(); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c961bc92c10..cf9fdbdabc6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -63,6 +63,12 @@ struct nfs_clone_mount { #define NFS_UNSPEC_PORT (-1) /* + * Maximum number of pages that readdir can use for creating + * a vmapped array of pages. + */ +#define NFS_MAX_READDIR_PAGES 8 + +/* * In-kernel mount arguments */ struct nfs_parsed_mount_data { @@ -122,9 +128,12 @@ extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern struct rpc_program nfs_program; +extern void nfs_cleanup_cb_ident_idr(void); extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32); -extern struct nfs_client *nfs_find_client_next(struct nfs_client *); +extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); +extern struct nfs_client *nfs4_find_client_ident(int); +extern struct nfs_client * +nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); @@ -179,17 +188,20 @@ extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); /* nfs2xdr.c */ -extern int nfs_stat_to_errno(int); +extern int nfs_stat_to_errno(enum nfs_stat); extern struct rpc_procinfo nfs_procedures[]; -extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); +extern int nfs2_decode_dirent(struct xdr_stream *, + struct nfs_entry *, int); /* nfs3xdr.c */ extern struct rpc_procinfo nfs3_procedures[]; -extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); +extern int nfs3_decode_dirent(struct xdr_stream *, + struct nfs_entry *, int); /* nfs4xdr.c */ #ifdef CONFIG_NFS_V4 -extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); +extern int nfs4_decode_dirent(struct xdr_stream *, + struct nfs_entry *, int); #endif #ifdef CONFIG_NFS_V4_1 extern const u32 nfs41_maxread_overhead; @@ -239,6 +251,7 @@ extern char *nfs_path(const char *base, const struct dentry *droot, const struct dentry *dentry, char *buffer, ssize_t buflen); +extern struct vfsmount *nfs_d_automount(struct path *path); /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); @@ -356,6 +369,15 @@ unsigned int nfs_page_length(struct page *page) } /* + * Convert a umode to a dirent->d_type + */ +static inline +unsigned char nfs_umode_to_dtype(umode_t mode) +{ + return (mode >> 12) & 15; +} + +/* * Determine the number of pages in an array of length 'len' and * with a base offset of 'base' */ diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 59047f8d7d7..d4c2d6b7507 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) .rpc_resp = &result, }; struct rpc_create_args args = { + .net = &init_net, .protocol = info->protocol, .address = info->sap, .addrsize = info->salen, @@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) .to_retries = 2, }; struct rpc_create_args args = { + .net = &init_net, .protocol = IPPROTO_UDP, .address = info->sap, .addrsize = info->salen, @@ -234,10 +236,8 @@ void nfs_umount(const struct nfs_mount_request *info) .authflavor = RPC_AUTH_UNIX, .flags = RPC_CLNT_CREATE_NOPING, }; - struct mountres result; struct rpc_message msg = { .rpc_argp = info->dirpath, - .rpc_resp = &result, }; struct rpc_clnt *clnt; int status; @@ -246,7 +246,7 @@ void nfs_umount(const struct nfs_mount_request *info) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; clnt = rpc_create(&args); - if (unlikely(IS_ERR(clnt))) + if (IS_ERR(clnt)) goto out_clnt_err; dprintk("NFS: sending UMNT request for %s:%s\n", @@ -278,29 +278,20 @@ out_call_err: * XDR encode/decode functions for MOUNT */ -static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) +static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) { const u32 pathname_len = strlen(pathname); __be32 *p; - if (unlikely(pathname_len > MNTPATHLEN)) - return -EIO; - - p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len); - if (unlikely(p == NULL)) - return -EIO; + BUG_ON(pathname_len > MNTPATHLEN); + p = xdr_reserve_space(xdr, 4 + pathname_len); xdr_encode_opaque(p, pathname, pathname_len); - - return 0; } -static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p, - const char *dirpath) +static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr, + const char *dirpath) { - struct xdr_stream xdr; - - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - return encode_mntdirpath(&xdr, dirpath); + encode_mntdirpath(xdr, dirpath); } /* @@ -318,10 +309,10 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res) u32 status; __be32 *p; - p = xdr_inline_decode(xdr, sizeof(status)); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return -EIO; - status = ntohl(*p); + status = be32_to_cpup(p); for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) { if (mnt_errtbl[i].status == status) { @@ -349,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res) return 0; } -static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p, - struct mountres *res) +static int mnt_xdr_dec_mountres(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct mountres *res) { - struct xdr_stream xdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - - status = decode_status(&xdr, res); + status = decode_status(xdr, res); if (unlikely(status != 0 || res->errno != 0)) return status; - return decode_fhandle(&xdr, res); + return decode_fhandle(xdr, res); } static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) @@ -369,10 +358,10 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) u32 status; __be32 *p; - p = xdr_inline_decode(xdr, sizeof(status)); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return -EIO; - status = ntohl(*p); + status = be32_to_cpup(p); for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) { if (mnt3_errtbl[i].status == status) { @@ -392,11 +381,11 @@ static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res) u32 size; __be32 *p; - p = xdr_inline_decode(xdr, sizeof(size)); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return -EIO; - size = ntohl(*p++); + size = be32_to_cpup(p); if (size > NFS3_FHSIZE || size == 0) return -EIO; @@ -419,15 +408,15 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res) if (*count == 0) return 0; - p = xdr_inline_decode(xdr, sizeof(entries)); + p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) return -EIO; - entries = ntohl(*p); + entries = be32_to_cpup(p); dprintk("NFS: received %u auth flavors\n", entries); if (entries > NFS_MAX_SECFLAVORS) entries = NFS_MAX_SECFLAVORS; - p = xdr_inline_decode(xdr, sizeof(u32) * entries); + p = xdr_inline_decode(xdr, 4 * entries); if (unlikely(p == NULL)) return -EIO; @@ -435,38 +424,36 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res) entries = *count; for (i = 0; i < entries; i++) { - flavors[i] = ntohl(*p++); - dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); + flavors[i] = be32_to_cpup(p++); + dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]); } *count = i; return 0; } -static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p, - struct mountres *res) +static int mnt_xdr_dec_mountres3(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct mountres *res) { - struct xdr_stream xdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - - status = decode_fhs_status(&xdr, res); + status = decode_fhs_status(xdr, res); if (unlikely(status != 0 || res->errno != 0)) return status; - status = decode_fhandle3(&xdr, res); + status = decode_fhandle3(xdr, res); if (unlikely(status != 0)) { res->errno = -EBADHANDLE; return 0; } - return decode_auth_flavors(&xdr, res); + return decode_auth_flavors(xdr, res); } static struct rpc_procinfo mnt_procedures[] = { [MOUNTPROC_MNT] = { .p_proc = MOUNTPROC_MNT, - .p_encode = (kxdrproc_t)mnt_enc_dirpath, - .p_decode = (kxdrproc_t)mnt_dec_mountres, + .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath, + .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres, .p_arglen = MNT_enc_dirpath_sz, .p_replen = MNT_dec_mountres_sz, .p_statidx = MOUNTPROC_MNT, @@ -474,7 +461,7 @@ static struct rpc_procinfo mnt_procedures[] = { }, [MOUNTPROC_UMNT] = { .p_proc = MOUNTPROC_UMNT, - .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath, .p_arglen = MNT_enc_dirpath_sz, .p_statidx = MOUNTPROC_UMNT, .p_name = "UMOUNT", @@ -484,8 +471,8 @@ static struct rpc_procinfo mnt_procedures[] = { static struct rpc_procinfo mnt3_procedures[] = { [MOUNTPROC3_MNT] = { .p_proc = MOUNTPROC3_MNT, - .p_encode = (kxdrproc_t)mnt_enc_dirpath, - .p_decode = (kxdrproc_t)mnt_dec_mountres3, + .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath, + .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres3, .p_arglen = MNT_enc_dirpath_sz, .p_replen = MNT_dec_mountres3_sz, .p_statidx = MOUNTPROC3_MNT, @@ -493,7 +480,7 @@ static struct rpc_procinfo mnt3_procedures[] = { }, [MOUNTPROC3_UMNT] = { .p_proc = MOUNTPROC3_UMNT, - .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath, .p_arglen = MNT_enc_dirpath_sz, .p_statidx = MOUNTPROC3_UMNT, .p_name = "UMOUNT", @@ -503,13 +490,13 @@ static struct rpc_procinfo mnt3_procedures[] = { static struct rpc_version mnt_version1 = { .number = 1, - .nrprocs = 2, + .nrprocs = ARRAY_SIZE(mnt_procedures), .procs = mnt_procedures, }; static struct rpc_version mnt_version3 = { .number = 3, - .nrprocs = 2, + .nrprocs = ARRAY_SIZE(mnt3_procedures), .procs = mnt3_procedures, }; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index db6aa3673cf..f32b8603dca 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -49,12 +49,17 @@ char *nfs_path(const char *base, const struct dentry *dentry, char *buffer, ssize_t buflen) { - char *end = buffer+buflen; + char *end; int namelen; + unsigned seq; +rename_retry: + end = buffer+buflen; *--end = '\0'; buflen--; - spin_lock(&dcache_lock); + + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; @@ -65,7 +70,9 @@ char *nfs_path(const char *base, *--end = '/'; dentry = dentry->d_parent; } - spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; if (*end != '/') { if (--buflen < 0) goto Elong; @@ -82,15 +89,16 @@ char *nfs_path(const char *base, memcpy(end, base, namelen); return end; Elong_unlock: - spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; Elong: return ERR_PTR(-ENAMETOOLONG); } /* - * nfs_follow_mountpoint - handle crossing a mountpoint on the server - * @dentry - dentry of mountpoint - * @nd - nameidata info + * nfs_d_automount - Handle crossing a mountpoint on the server + * @path - The mountpoint * * When we encounter a mountpoint on the server, we want to set up * a mountpoint on the client too, to prevent inode numbers from @@ -100,87 +108,65 @@ Elong: * situation, and that different filesystems may want to use * different security flavours. */ -static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +struct vfsmount *nfs_d_automount(struct path *path) { struct vfsmount *mnt; - struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs_server *server = NFS_SERVER(path->dentry->d_inode); struct dentry *parent; struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; int err; - dprintk("--> nfs_follow_mountpoint()\n"); + dprintk("--> nfs_d_automount()\n"); - err = -ESTALE; - if (IS_ROOT(dentry)) - goto out_err; + mnt = ERR_PTR(-ESTALE); + if (IS_ROOT(path->dentry)) + goto out_nofree; - err = -ENOMEM; + mnt = ERR_PTR(-ENOMEM); fh = nfs_alloc_fhandle(); fattr = nfs_alloc_fattr(); if (fh == NULL || fattr == NULL) - goto out_err; + goto out; dprintk("%s: enter\n", __func__); - dput(nd->path.dentry); - nd->path.dentry = dget(dentry); - /* Look it up again */ - parent = dget_parent(nd->path.dentry); + /* Look it up again to get its attributes */ + parent = dget_parent(path->dentry); err = server->nfs_client->rpc_ops->lookup(parent->d_inode, - &nd->path.dentry->d_name, + &path->dentry->d_name, fh, fattr); dput(parent); - if (err != 0) - goto out_err; + if (err != 0) { + mnt = ERR_PTR(err); + goto out; + } if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) - mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry); + mnt = nfs_do_refmount(path->mnt, path->dentry); else - mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh, - fattr); - err = PTR_ERR(mnt); + mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr); if (IS_ERR(mnt)) - goto out_err; + goto out; - mntget(mnt); - err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, - &nfs_automount_list); - if (err < 0) { - mntput(mnt); - if (err == -EBUSY) - goto out_follow; - goto out_err; - } - path_put(&nd->path); - nd->path.mnt = mnt; - nd->path.dentry = dget(mnt->mnt_root); + dprintk("%s: done, success\n", __func__); + mntget(mnt); /* prevent immediate expiration */ + mnt_set_expiry(mnt, &nfs_automount_list); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); + out: nfs_free_fattr(fattr); nfs_free_fhandle(fh); - dprintk("%s: done, returned %d\n", __func__, err); - - dprintk("<-- nfs_follow_mountpoint() = %d\n", err); - return ERR_PTR(err); -out_err: - path_put(&nd->path); - goto out; -out_follow: - while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path)) - ; - err = 0; - goto out; +out_nofree: + dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); + return mnt; } const struct inode_operations nfs_mountpoint_inode_operations = { - .follow_link = nfs_follow_mountpoint, .getattr = nfs_getattr, }; const struct inode_operations nfs_referral_inode_operations = { - .follow_link = nfs_follow_mountpoint, }; static void nfs_expire_automounts(struct work_struct *work) diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index db8846a0e82..792cb13a430 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -61,609 +61,1008 @@ #define NFS_readdirres_sz (1) #define NFS_statfsres_sz (1+NFS_info_sz) + +/* + * While encoding arguments, set up the reply buffer in advance to + * receive reply data directly into the page cache. + */ +static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages, + unsigned int base, unsigned int len, + unsigned int bufsize) +{ + struct rpc_auth *auth = req->rq_cred->cr_auth; + unsigned int replen; + + replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize; + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len); +} + +/* + * Handle decode buffer overflows out-of-line. + */ +static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) +{ + dprintk("NFS: %s prematurely hit the end of our receive buffer. " + "Remaining buffer length is %tu words.\n", + func, xdr->end - xdr->p); +} + + +/* + * Encode/decode NFSv2 basic data types + * + * Basic NFSv2 data types are defined in section 2.3 of RFC 1094: + * "NFS: Network File System Protocol Specification". + * + * Not all basic data types have their own encoding and decoding + * functions. For run-time efficiency, some data types are encoded + * or decoded inline. + */ + +/* + * typedef opaque nfsdata<>; + */ +static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) +{ + u32 recvd, count; + size_t hdrlen; + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + count = be32_to_cpup(p); + hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + recvd = xdr->buf->len - hdrlen; + if (unlikely(count > recvd)) + goto out_cheating; +out: + xdr_read_pages(xdr, count); + result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */ + result->count = count; + return count; +out_cheating: + dprintk("NFS: server cheating in read result: " + "count %u > recvd %u\n", count, recvd); + count = recvd; + goto out; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +/* + * enum stat { + * NFS_OK = 0, + * NFSERR_PERM = 1, + * NFSERR_NOENT = 2, + * NFSERR_IO = 5, + * NFSERR_NXIO = 6, + * NFSERR_ACCES = 13, + * NFSERR_EXIST = 17, + * NFSERR_NODEV = 19, + * NFSERR_NOTDIR = 20, + * NFSERR_ISDIR = 21, + * NFSERR_FBIG = 27, + * NFSERR_NOSPC = 28, + * NFSERR_ROFS = 30, + * NFSERR_NAMETOOLONG = 63, + * NFSERR_NOTEMPTY = 66, + * NFSERR_DQUOT = 69, + * NFSERR_STALE = 70, + * NFSERR_WFLUSH = 99 + * }; + */ +static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + *status = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + /* - * Common NFS XDR functions as inlines + * 2.3.2. ftype + * + * enum ftype { + * NFNON = 0, + * NFREG = 1, + * NFDIR = 2, + * NFBLK = 3, + * NFCHR = 4, + * NFLNK = 5 + * }; + * */ -static inline __be32 * -xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fhandle) +static __be32 *xdr_decode_ftype(__be32 *p, u32 *type) { - memcpy(p, fhandle->data, NFS2_FHSIZE); - return p + XDR_QUADLEN(NFS2_FHSIZE); + *type = be32_to_cpup(p++); + if (unlikely(*type > NF2FIFO)) + *type = NFBAD; + return p; } -static inline __be32 * -xdr_decode_fhandle(__be32 *p, struct nfs_fh *fhandle) +/* + * 2.3.3. fhandle + * + * typedef opaque fhandle[FHSIZE]; + */ +static void encode_fhandle(struct xdr_stream *xdr, const struct nfs_fh *fh) { - /* NFSv2 handles have a fixed length */ - fhandle->size = NFS2_FHSIZE; - memcpy(fhandle->data, p, NFS2_FHSIZE); - return p + XDR_QUADLEN(NFS2_FHSIZE); + __be32 *p; + + BUG_ON(fh->size != NFS2_FHSIZE); + p = xdr_reserve_space(xdr, NFS2_FHSIZE); + memcpy(p, fh->data, NFS2_FHSIZE); } -static inline __be32* -xdr_encode_time(__be32 *p, struct timespec *timep) +static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh) { - *p++ = htonl(timep->tv_sec); - /* Convert nanoseconds into microseconds */ - *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0); + __be32 *p; + + p = xdr_inline_decode(xdr, NFS2_FHSIZE); + if (unlikely(p == NULL)) + goto out_overflow; + fh->size = NFS2_FHSIZE; + memcpy(fh->data, p, NFS2_FHSIZE); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +/* + * 2.3.4. timeval + * + * struct timeval { + * unsigned int seconds; + * unsigned int useconds; + * }; + */ +static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep) +{ + *p++ = cpu_to_be32(timep->tv_sec); + if (timep->tv_nsec != 0) + *p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC); + else + *p++ = cpu_to_be32(0); return p; } -static inline __be32* -xdr_encode_current_server_time(__be32 *p, struct timespec *timep) +/* + * Passing the invalid value useconds=1000000 is a Sun convention for + * "set to current server time". It's needed to make permissions checks + * for the "touch" program across v2 mounts to Solaris and Irix servers + * work correctly. See description of sattr in section 6.1 of "NFS + * Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5. + */ +static __be32 *xdr_encode_current_server_time(__be32 *p, + const struct timespec *timep) { - /* - * Passing the invalid value useconds=1000000 is a - * Sun convention for "set to current server time". - * It's needed to make permissions checks for the - * "touch" program across v2 mounts to Solaris and - * Irix boxes work correctly. See description of - * sattr in section 6.1 of "NFS Illustrated" by - * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 - */ - *p++ = htonl(timep->tv_sec); - *p++ = htonl(1000000); + *p++ = cpu_to_be32(timep->tv_sec); + *p++ = cpu_to_be32(1000000); return p; } -static inline __be32* -xdr_decode_time(__be32 *p, struct timespec *timep) +static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep) { - timep->tv_sec = ntohl(*p++); - /* Convert microseconds into nanoseconds */ - timep->tv_nsec = ntohl(*p++) * 1000; + timep->tv_sec = be32_to_cpup(p++); + timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC; return p; } -static __be32 * -xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) +/* + * 2.3.5. fattr + * + * struct fattr { + * ftype type; + * unsigned int mode; + * unsigned int nlink; + * unsigned int uid; + * unsigned int gid; + * unsigned int size; + * unsigned int blocksize; + * unsigned int rdev; + * unsigned int blocks; + * unsigned int fsid; + * unsigned int fileid; + * timeval atime; + * timeval mtime; + * timeval ctime; + * }; + * + */ +static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr) { u32 rdev, type; - type = ntohl(*p++); - fattr->mode = ntohl(*p++); - fattr->nlink = ntohl(*p++); - fattr->uid = ntohl(*p++); - fattr->gid = ntohl(*p++); - fattr->size = ntohl(*p++); - fattr->du.nfs2.blocksize = ntohl(*p++); - rdev = ntohl(*p++); - fattr->du.nfs2.blocks = ntohl(*p++); - fattr->fsid.major = ntohl(*p++); - fattr->fsid.minor = 0; - fattr->fileid = ntohl(*p++); - p = xdr_decode_time(p, &fattr->atime); - p = xdr_decode_time(p, &fattr->mtime); - p = xdr_decode_time(p, &fattr->ctime); + __be32 *p; + + p = xdr_inline_decode(xdr, NFS_fattr_sz << 2); + if (unlikely(p == NULL)) + goto out_overflow; + fattr->valid |= NFS_ATTR_FATTR_V2; + + p = xdr_decode_ftype(p, &type); + + fattr->mode = be32_to_cpup(p++); + fattr->nlink = be32_to_cpup(p++); + fattr->uid = be32_to_cpup(p++); + fattr->gid = be32_to_cpup(p++); + fattr->size = be32_to_cpup(p++); + fattr->du.nfs2.blocksize = be32_to_cpup(p++); + + rdev = be32_to_cpup(p++); fattr->rdev = new_decode_dev(rdev); - if (type == NFCHR && rdev == NFS2_FIFO_DEV) { + if (type == (u32)NFCHR && rdev == (u32)NFS2_FIFO_DEV) { fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } - return p; + + fattr->du.nfs2.blocks = be32_to_cpup(p++); + fattr->fsid.major = be32_to_cpup(p++); + fattr->fsid.minor = 0; + fattr->fileid = be32_to_cpup(p++); + + p = xdr_decode_time(p, &fattr->atime); + p = xdr_decode_time(p, &fattr->mtime); + xdr_decode_time(p, &fattr->ctime); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } -static inline __be32 * -xdr_encode_sattr(__be32 *p, struct iattr *attr) -{ - const __be32 not_set = __constant_htonl(0xFFFFFFFF); +/* + * 2.3.6. sattr + * + * struct sattr { + * unsigned int mode; + * unsigned int uid; + * unsigned int gid; + * unsigned int size; + * timeval atime; + * timeval mtime; + * }; + */ - *p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set; - *p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set; - *p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set; - *p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set; +#define NFS2_SATTR_NOT_SET (0xffffffff) + +static __be32 *xdr_time_not_set(__be32 *p) +{ + *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); + *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); + return p; +} - if (attr->ia_valid & ATTR_ATIME_SET) { +static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS_sattr_sz << 2); + + if (attr->ia_valid & ATTR_MODE) + *p++ = cpu_to_be32(attr->ia_mode); + else + *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); + if (attr->ia_valid & ATTR_UID) + *p++ = cpu_to_be32(attr->ia_uid); + else + *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); + if (attr->ia_valid & ATTR_GID) + *p++ = cpu_to_be32(attr->ia_gid); + else + *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); + if (attr->ia_valid & ATTR_SIZE) + *p++ = cpu_to_be32((u32)attr->ia_size); + else + *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); + + if (attr->ia_valid & ATTR_ATIME_SET) p = xdr_encode_time(p, &attr->ia_atime); - } else if (attr->ia_valid & ATTR_ATIME) { + else if (attr->ia_valid & ATTR_ATIME) p = xdr_encode_current_server_time(p, &attr->ia_atime); - } else { - *p++ = not_set; - *p++ = not_set; - } - - if (attr->ia_valid & ATTR_MTIME_SET) { - p = xdr_encode_time(p, &attr->ia_mtime); - } else if (attr->ia_valid & ATTR_MTIME) { - p = xdr_encode_current_server_time(p, &attr->ia_mtime); - } else { - *p++ = not_set; - *p++ = not_set; - } - return p; + else + p = xdr_time_not_set(p); + if (attr->ia_valid & ATTR_MTIME_SET) + xdr_encode_time(p, &attr->ia_mtime); + else if (attr->ia_valid & ATTR_MTIME) + xdr_encode_current_server_time(p, &attr->ia_mtime); + else + xdr_time_not_set(p); } /* - * NFS encode functions - */ -/* - * Encode file handle argument - * GETATTR, READLINK, STATFS + * 2.3.7. filename + * + * typedef string filename<MAXNAMLEN>; */ -static int -nfs_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh) +static void encode_filename(struct xdr_stream *xdr, + const char *name, u32 length) { - p = xdr_encode_fhandle(p, fh); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + __be32 *p; + + BUG_ON(length > NFS2_MAXNAMLEN); + p = xdr_reserve_space(xdr, 4 + length); + xdr_encode_opaque(p, name, length); +} + +static int decode_filename_inline(struct xdr_stream *xdr, + const char **name, u32 *length) +{ + __be32 *p; + u32 count; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + count = be32_to_cpup(p); + if (count > NFS3_MAXNAMLEN) + goto out_nametoolong; + p = xdr_inline_decode(xdr, count); + if (unlikely(p == NULL)) + goto out_overflow; + *name = (const char *)p; + *length = count; return 0; +out_nametoolong: + dprintk("NFS: returned filename too long: %u\n", count); + return -ENAMETOOLONG; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Encode SETATTR arguments + * 2.3.8. path + * + * typedef string path<MAXPATHLEN>; */ -static int -nfs_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs_sattrargs *args) +static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length) +{ + __be32 *p; + + BUG_ON(length > NFS2_MAXPATHLEN); + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(length); + xdr_write_pages(xdr, pages, 0, length); +} + +static int decode_path(struct xdr_stream *xdr) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + u32 length, recvd; + size_t hdrlen; + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + length = be32_to_cpup(p); + if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN)) + goto out_size; + hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + recvd = xdr->buf->len - hdrlen; + if (unlikely(length > recvd)) + goto out_cheating; + + xdr_read_pages(xdr, length); + xdr_terminate_string(xdr->buf, length); return 0; +out_size: + dprintk("NFS: returned pathname too long: %u\n", length); + return -ENAMETOOLONG; +out_cheating: + dprintk("NFS: server cheating in pathname result: " + "length %u > received %u\n", length, recvd); + return -EIO; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Encode directory ops argument - * LOOKUP, RMDIR + * 2.3.9. attrstat + * + * union attrstat switch (stat status) { + * case NFS_OK: + * fattr attributes; + * default: + * void; + * }; */ -static int -nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) +static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name, args->len); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + enum nfs_stat status; + int error; + + error = decode_stat(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS_OK) + goto out_default; + error = decode_fattr(xdr, result); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } /* - * Encode REMOVE argument + * 2.3.10. diropargs + * + * struct diropargs { + * fhandle dir; + * filename name; + * }; */ -static int -nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) +static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh, + const char *name, u32 length) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name.name, args->name.len); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + encode_fhandle(xdr, fh); + encode_filename(xdr, name, length); } /* - * Arguments to a READ call. Since we read data directly into the page - * cache, we also set up the reply iovec here so that iov[1] points - * exactly to the page we want to fetch. + * 2.3.11. diropres + * + * union diropres switch (stat status) { + * case NFS_OK: + * struct { + * fhandle file; + * fattr attributes; + * } diropok; + * default: + * void; + * }; */ -static int -nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) +static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result) { - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - u32 offset = (u32)args->offset; - u32 count = args->count; - - p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(offset); - *p++ = htonl(count); - *p++ = htonl(count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + int error; + + error = decode_fhandle(xdr, result->fh); + if (unlikely(error)) + goto out; + error = decode_fattr(xdr, result->fattr); +out: + return error; +} - /* Inline the page array */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, - args->pages, args->pgbase, count); - req->rq_rcv_buf.flags |= XDRBUF_READ; - return 0; +static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result) +{ + enum nfs_stat status; + int error; + + error = decode_stat(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS_OK) + goto out_default; + error = decode_diropok(xdr, result); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } + /* - * Decode READ reply + * NFSv2 XDR encode functions + * + * NFSv2 argument types are defined in section 2.2 of RFC 1094: + * "NFS: Network File System Protocol Specification". */ -static int -nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) -{ - struct kvec *iov = req->rq_rcv_buf.head; - size_t hdrlen; - u32 count, recvd; - int status; - - if ((status = ntohl(*p++))) - return nfs_stat_to_errno(status); - p = xdr_decode_fattr(p, res->fattr); - - count = ntohl(*p++); - res->eof = 0; - hdrlen = (u8 *) p - (u8 *) iov->iov_base; - if (iov->iov_len < hdrlen) { - dprintk("NFS: READ reply header overflowed:" - "length %Zu > %Zu\n", hdrlen, iov->iov_len); - return -errno_NFSERR_IO; - } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READ header is short. iovec will be shifted.\n"); - xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); - } - recvd = req->rq_rcv_buf.len - hdrlen; - if (count > recvd) { - dprintk("NFS: server cheating in read reply: " - "count %u > recvd %u\n", count, recvd); - count = recvd; - } +static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_fh *fh) +{ + encode_fhandle(xdr, fh); +} - dprintk("RPC: readres OK count %u\n", count); - if (count < res->count) - res->count = count; +/* + * 2.2.3. sattrargs + * + * struct sattrargs { + * fhandle file; + * sattr attributes; + * }; + */ +static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_sattrargs *args) +{ + encode_fhandle(xdr, args->fh); + encode_sattr(xdr, args->sattr); +} - return count; +static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_diropargs *args) +{ + encode_diropargs(xdr, args->fh, args->name, args->len); } +static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_readlinkargs *args) +{ + encode_fhandle(xdr, args->fh); + prepare_reply_buffer(req, args->pages, args->pgbase, + args->pglen, NFS_readlinkres_sz); +} /* - * Write arguments. Splice the buffer to be written into the iovec. + * 2.2.7. readargs + * + * struct readargs { + * fhandle file; + * unsigned offset; + * unsigned count; + * unsigned totalcount; + * }; */ -static int -nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +static void encode_readargs(struct xdr_stream *xdr, + const struct nfs_readargs *args) { - struct xdr_buf *sndbuf = &req->rq_snd_buf; - u32 offset = (u32)args->offset; + u32 offset = args->offset; u32 count = args->count; + __be32 *p; - p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(offset); - *p++ = htonl(offset); - *p++ = htonl(count); - *p++ = htonl(count); - sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + encode_fhandle(xdr, args->fh); - /* Copy the page array */ - xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); - sndbuf->flags |= XDRBUF_WRITE; - return 0; + p = xdr_reserve_space(xdr, 4 + 4 + 4); + *p++ = cpu_to_be32(offset); + *p++ = cpu_to_be32(count); + *p = cpu_to_be32(count); } -/* - * Encode create arguments - * CREATE, MKDIR - */ -static int -nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args) +static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_readargs *args) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + encode_readargs(xdr, args); + prepare_reply_buffer(req, args->pages, args->pgbase, + args->count, NFS_readres_sz); + req->rq_rcv_buf.flags |= XDRBUF_READ; } /* - * Encode RENAME arguments + * 2.2.9. writeargs + * + * struct writeargs { + * fhandle file; + * unsigned beginoffset; + * unsigned offset; + * unsigned totalcount; + * nfsdata data; + * }; */ -static int -nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) +static void encode_writeargs(struct xdr_stream *xdr, + const struct nfs_writeargs *args) { - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_array(p, args->toname, args->tolen); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + u32 offset = args->offset; + u32 count = args->count; + __be32 *p; + + encode_fhandle(xdr, args->fh); + + p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4); + *p++ = cpu_to_be32(offset); + *p++ = cpu_to_be32(offset); + *p++ = cpu_to_be32(count); + + /* nfsdata */ + *p = cpu_to_be32(count); + xdr_write_pages(xdr, args->pages, args->pgbase, count); } -/* - * Encode LINK arguments - */ -static int -nfs_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs_linkargs *args) +static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_writeargs *args) { - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_array(p, args->toname, args->tolen); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + encode_writeargs(xdr, args); + xdr->buf->flags |= XDRBUF_WRITE; } /* - * Encode SYMLINK arguments + * 2.2.10. createargs + * + * struct createargs { + * diropargs where; + * sattr attributes; + * }; */ -static int -nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *args) +static void nfs2_xdr_enc_createargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_createargs *args) { - struct xdr_buf *sndbuf = &req->rq_snd_buf; - size_t pad; + encode_diropargs(xdr, args->fh, args->name, args->len); + encode_sattr(xdr, args->sattr); +} - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_array(p, args->fromname, args->fromlen); - *p++ = htonl(args->pathlen); - sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); +static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_removeargs *args) +{ + encode_diropargs(xdr, args->fh, args->name.name, args->name.len); +} - xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen); +/* + * 2.2.12. renameargs + * + * struct renameargs { + * diropargs from; + * diropargs to; + * }; + */ +static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_renameargs *args) +{ + const struct qstr *old = args->old_name; + const struct qstr *new = args->new_name; - /* - * xdr_encode_pages may have added a few bytes to ensure the - * pathname ends on a 4-byte boundary. Start encoding the - * attributes after the pad bytes. - */ - pad = sndbuf->tail->iov_len; - if (pad > 0) - p++; - p = xdr_encode_sattr(p, args->sattr); - sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad; - return 0; + encode_diropargs(xdr, args->old_dir, old->name, old->len); + encode_diropargs(xdr, args->new_dir, new->name, new->len); } /* - * Encode arguments to readdir call + * 2.2.13. linkargs + * + * struct linkargs { + * fhandle from; + * diropargs to; + * }; */ -static int -nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) +static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_linkargs *args) { - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - u32 count = args->count; - - p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); - *p++ = htonl(count); /* see above */ - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + encode_fhandle(xdr, args->fromfh); + encode_diropargs(xdr, args->tofh, args->toname, args->tolen); +} - /* Inline the page array */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); - return 0; +/* + * 2.2.14. symlinkargs + * + * struct symlinkargs { + * diropargs from; + * path to; + * sattr attributes; + * }; + */ +static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_symlinkargs *args) +{ + encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen); + encode_path(xdr, args->pages, args->pathlen); + encode_sattr(xdr, args->sattr); } /* - * Decode the result of a readdir call. - * We're not really decoding anymore, we just leave the buffer untouched - * and only check that it is syntactically correct. - * The real decoding happens in nfs_decode_entry below, called directly - * from nfs_readdir for each entry. + * 2.2.17. readdirargs + * + * struct readdirargs { + * fhandle dir; + * nfscookie cookie; + * unsigned count; + * }; */ -static int -nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) +static void encode_readdirargs(struct xdr_stream *xdr, + const struct nfs_readdirargs *args) { - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - struct page **page; - size_t hdrlen; - unsigned int pglen, recvd; - u32 len; - int status, nr = 0; - __be32 *end, *entry, *kaddr; - - if ((status = ntohl(*p++))) - return nfs_stat_to_errno(status); - - hdrlen = (u8 *) p - (u8 *) iov->iov_base; - if (iov->iov_len < hdrlen) { - dprintk("NFS: READDIR reply header overflowed:" - "length %Zu > %Zu\n", hdrlen, iov->iov_len); - return -errno_NFSERR_IO; - } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); - xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); - } + __be32 *p; - pglen = rcvbuf->page_len; - recvd = rcvbuf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - page = rcvbuf->pages; - kaddr = p = kmap_atomic(*page, KM_USER0); - end = (__be32 *)((char *)p + pglen); - entry = p; - - /* Make sure the packet actually has a value_follows and EOF entry */ - if ((entry + 1) > end) - goto short_pkt; - - for (; *p++; nr++) { - if (p + 2 > end) - goto short_pkt; - p++; /* fileid */ - len = ntohl(*p++); - p += XDR_QUADLEN(len) + 1; /* name plus cookie */ - if (len > NFS2_MAXNAMLEN) { - dprintk("NFS: giant filename in readdir (len 0x%x)!\n", - len); - goto err_unmap; - } - if (p + 2 > end) - goto short_pkt; - entry = p; - } + encode_fhandle(xdr, args->fh); - /* - * Apparently some server sends responses that are a valid size, but - * contain no entries, and have value_follows==0 and EOF==0. For - * those, just set the EOF marker. - */ - if (!nr && entry[1] == 0) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } - out: - kunmap_atomic(kaddr, KM_USER0); - return nr; - short_pkt: - /* - * When we get a short packet there are 2 possibilities. We can - * return an error, or fix up the response to look like a valid - * response and return what we have so far. If there are no - * entries and the packet was short, then return -EIO. If there - * are valid entries in the response, return them and pretend that - * the call was successful, but incomplete. The caller can retry the - * readdir starting at the last cookie. - */ - entry[0] = entry[1] = 0; - if (!nr) - nr = -errno_NFSERR_IO; - goto out; -err_unmap: - nr = -errno_NFSERR_IO; - goto out; + p = xdr_reserve_space(xdr, 4 + 4); + *p++ = cpu_to_be32(args->cookie); + *p = cpu_to_be32(args->count); } -__be32 * -nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_readdirargs *args) { - if (!*p++) { - if (!*p) - return ERR_PTR(-EAGAIN); - entry->eof = 1; - return ERR_PTR(-EBADCOOKIE); - } - - entry->ino = ntohl(*p++); - entry->len = ntohl(*p++); - entry->name = (const char *) p; - p += XDR_QUADLEN(entry->len); - entry->prev_cookie = entry->cookie; - entry->cookie = ntohl(*p++); - entry->eof = !p[0] && p[1]; - - return p; + encode_readdirargs(xdr, args); + prepare_reply_buffer(req, args->pages, 0, + args->count, NFS_readdirres_sz); } /* - * NFS XDR decode functions - */ -/* - * Decode simple status reply + * NFSv2 XDR decode functions + * + * NFSv2 result types are defined in section 2.2 of RFC 1094: + * "NFS: Network File System Protocol Specification". */ -static int -nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy) + +static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr, + void *__unused) { - int status; + enum nfs_stat status; + int error; + + error = decode_stat(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS_OK) + goto out_default; +out: + return error; +out_default: + return nfs_stat_to_errno(status); +} - if ((status = ntohl(*p++)) != 0) - status = nfs_stat_to_errno(status); - return status; +static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_fattr *result) +{ + return decode_attrstat(xdr, result); +} + +static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_diropok *result) +{ + return decode_diropres(xdr, result); } /* - * Decode attrstat reply - * GETATTR, SETATTR, WRITE + * 2.2.6. readlinkres + * + * union readlinkres switch (stat status) { + * case NFS_OK: + * path data; + * default: + * void; + * }; */ -static int -nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req, + struct xdr_stream *xdr, void *__unused) { - int status; - - if ((status = ntohl(*p++))) - return nfs_stat_to_errno(status); - xdr_decode_fattr(p, fattr); - return 0; + enum nfs_stat status; + int error; + + error = decode_stat(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS_OK) + goto out_default; + error = decode_path(xdr); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } /* - * Decode diropres reply - * LOOKUP, CREATE, MKDIR + * 2.2.7. readres + * + * union readres switch (stat status) { + * case NFS_OK: + * fattr attributes; + * nfsdata data; + * default: + * void; + * }; */ -static int -nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) +static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_readres *result) { - int status; + enum nfs_stat status; + int error; + + error = decode_stat(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS_OK) + goto out_default; + error = decode_fattr(xdr, result->fattr); + if (unlikely(error)) + goto out; + error = decode_nfsdata(xdr, result); +out: + return error; +out_default: + return nfs_stat_to_errno(status); +} - if ((status = ntohl(*p++))) - return nfs_stat_to_errno(status); - p = xdr_decode_fhandle(p, res->fh); - xdr_decode_fattr(p, res->fattr); - return 0; +static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_writeres *result) +{ + /* All NFSv2 writes are "file sync" writes */ + result->verf->committed = NFS_FILE_SYNC; + return decode_attrstat(xdr, result->fattr); } -/* - * Encode READLINK args +/** + * nfs2_decode_dirent - Decode a single NFSv2 directory entry stored in + * the local page cache. + * @xdr: XDR stream where entry resides + * @entry: buffer to fill in with entry data + * @plus: boolean indicating whether this should be a readdirplus entry + * + * Returns zero if successful, otherwise a negative errno value is + * returned. + * + * This function is not invoked during READDIR reply decoding, but + * rather whenever an application invokes the getdents(2) system call + * on a directory already in our cache. + * + * 2.2.17. entry + * + * struct entry { + * unsigned fileid; + * filename name; + * nfscookie cookie; + * entry *nextentry; + * }; */ -static int -nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) +int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + int plus) { - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; + __be32 *p; + int error; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p++ == xdr_zero) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p++ == xdr_zero) + return -EAGAIN; + entry->eof = 1; + return -EBADCOOKIE; + } + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + entry->ino = be32_to_cpup(p); + + error = decode_filename_inline(xdr, &entry->name, &entry->len); + if (unlikely(error)) + return error; + + /* + * The type (size and byte order) of nfscookie isn't defined in + * RFC 1094. This implementation assumes that it's an XDR uint32. + */ + entry->prev_cookie = entry->cookie; + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + entry->cookie = be32_to_cpup(p); - p = xdr_encode_fhandle(p, args->fh); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + entry->d_type = DT_UNKNOWN; - /* Inline the page array */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EAGAIN; } /* - * Decode READLINK reply + * 2.2.17. readdirres + * + * union readdirres switch (stat status) { + * case NFS_OK: + * struct { + * entry *entries; + * bool eof; + * } readdirok; + * default: + * void; + * }; + * + * Read the directory contents into the page cache, but don't + * touch them. The actual decoding is done by nfs2_decode_dirent() + * during subsequent nfs_readdir() calls. */ -static int -nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) +static int decode_readdirok(struct xdr_stream *xdr) { - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; + u32 recvd, pglen; size_t hdrlen; - u32 len, recvd; - char *kaddr; - int status; - - if ((status = ntohl(*p++))) - return nfs_stat_to_errno(status); - /* Convert length of symlink */ - len = ntohl(*p++); - if (len >= rcvbuf->page_len) { - dprintk("nfs: server returned giant symlink!\n"); - return -ENAMETOOLONG; - } - hdrlen = (u8 *) p - (u8 *) iov->iov_base; - if (iov->iov_len < hdrlen) { - dprintk("NFS: READLINK reply header overflowed:" - "length %Zu > %Zu\n", hdrlen, iov->iov_len); - return -errno_NFSERR_IO; - } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); - xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); - } - recvd = req->rq_rcv_buf.len - hdrlen; - if (recvd < len) { - dprintk("NFS: server cheating in readlink reply: " - "count %u > recvd %u\n", len, recvd); - return -EIO; - } - /* NULL terminate the string we got */ - kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); - kaddr[len+rcvbuf->page_base] = '\0'; - kunmap_atomic(kaddr, KM_USER0); - return 0; + pglen = xdr->buf->page_len; + hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + recvd = xdr->buf->len - hdrlen; + if (unlikely(pglen > recvd)) + goto out_cheating; +out: + xdr_read_pages(xdr, pglen); + return pglen; +out_cheating: + dprintk("NFS: server cheating in readdir result: " + "pglen %u > recvd %u\n", pglen, recvd); + pglen = recvd; + goto out; } -/* - * Decode WRITE reply - */ -static int -nfs_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) +static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, + struct xdr_stream *xdr, void *__unused) { - res->verf->committed = NFS_FILE_SYNC; - return nfs_xdr_attrstat(req, p, res->fattr); + enum nfs_stat status; + int error; + + error = decode_stat(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS_OK) + goto out_default; + error = decode_readdirok(xdr); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } /* - * Decode STATFS reply + * 2.2.18. statfsres + * + * union statfsres (stat status) { + * case NFS_OK: + * struct { + * unsigned tsize; + * unsigned bsize; + * unsigned blocks; + * unsigned bfree; + * unsigned bavail; + * } info; + * default: + * void; + * }; */ -static int -nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res) +static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result) { - int status; - - if ((status = ntohl(*p++))) - return nfs_stat_to_errno(status); - - res->tsize = ntohl(*p++); - res->bsize = ntohl(*p++); - res->blocks = ntohl(*p++); - res->bfree = ntohl(*p++); - res->bavail = ntohl(*p++); + __be32 *p; + + p = xdr_inline_decode(xdr, NFS_info_sz << 2); + if (unlikely(p == NULL)) + goto out_overflow; + result->tsize = be32_to_cpup(p++); + result->bsize = be32_to_cpup(p++); + result->blocks = be32_to_cpup(p++); + result->bfree = be32_to_cpup(p++); + result->bavail = be32_to_cpup(p); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs2_fsstat *result) +{ + enum nfs_stat status; + int error; + + error = decode_stat(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS_OK) + goto out_default; + error = decode_info(xdr, result); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } + /* * We need to translate between nfs status return values and * the local errno values which may not be the same. */ -static struct { +static const struct { int stat; int errno; } nfs_errtbl[] = { @@ -703,28 +1102,30 @@ static struct { { -1, -EIO } }; -/* - * Convert an NFS error code to a local one. - * This one is used jointly by NFSv2 and NFSv3. +/** + * nfs_stat_to_errno - convert an NFS status code to a local errno + * @status: NFS status code to convert + * + * Returns a local errno value, or -EIO if the NFS status code is + * not recognized. This function is used jointly by NFSv2 and NFSv3. */ -int -nfs_stat_to_errno(int stat) +int nfs_stat_to_errno(enum nfs_stat status) { int i; for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == stat) + if (nfs_errtbl[i].stat == (int)status) return nfs_errtbl[i].errno; } - dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + dprintk("NFS: Unrecognized nfs status value: %u\n", status); return nfs_errtbl[i].errno; } #define PROC(proc, argtype, restype, timer) \ [NFSPROC_##proc] = { \ .p_proc = NFSPROC_##proc, \ - .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ - .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ + .p_encode = (kxdreproc_t)nfs2_xdr_enc_##argtype, \ + .p_decode = (kxdrdproc_t)nfs2_xdr_dec_##restype, \ .p_arglen = NFS_##argtype##_sz, \ .p_replen = NFS_##restype##_sz, \ .p_timer = timer, \ @@ -732,21 +1133,21 @@ nfs_stat_to_errno(int stat) .p_name = #proc, \ } struct rpc_procinfo nfs_procedures[] = { - PROC(GETATTR, fhandle, attrstat, 1), - PROC(SETATTR, sattrargs, attrstat, 0), - PROC(LOOKUP, diropargs, diropres, 2), - PROC(READLINK, readlinkargs, readlinkres, 3), - PROC(READ, readargs, readres, 3), - PROC(WRITE, writeargs, writeres, 4), - PROC(CREATE, createargs, diropres, 0), - PROC(REMOVE, removeargs, stat, 0), - PROC(RENAME, renameargs, stat, 0), - PROC(LINK, linkargs, stat, 0), - PROC(SYMLINK, symlinkargs, stat, 0), - PROC(MKDIR, createargs, diropres, 0), - PROC(RMDIR, diropargs, stat, 0), - PROC(READDIR, readdirargs, readdirres, 3), - PROC(STATFS, fhandle, statfsres, 0), + PROC(GETATTR, fhandle, attrstat, 1), + PROC(SETATTR, sattrargs, attrstat, 0), + PROC(LOOKUP, diropargs, diropres, 2), + PROC(READLINK, readlinkargs, readlinkres, 3), + PROC(READ, readargs, readres, 3), + PROC(WRITE, writeargs, writeres, 4), + PROC(CREATE, createargs, diropres, 0), + PROC(REMOVE, removeargs, stat, 0), + PROC(RENAME, renameargs, stat, 0), + PROC(LINK, linkargs, stat, 0), + PROC(SYMLINK, symlinkargs, stat, 0), + PROC(MKDIR, createargs, diropres, 0), + PROC(RMDIR, diropargs, stat, 0), + PROC(READDIR, readdirargs, readdirres, 3), + PROC(STATFS, fhandle, statfsres, 0), }; struct rpc_version nfs_version2 = { diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9f88c5f4c7e..27434277165 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, if (!nfs_server_capable(inode, NFS_CAP_ACLS)) goto out; - /* We are doing this here, because XDR marshalling can only - return -ENOMEM. */ + /* We are doing this here because XDR marshalling does not + * return any results, it BUGs. */ status = -ENOSPC; if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) goto out; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index fabb4f2849a..ce939c062a5 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nameidata *nd) + int flags, struct nfs_open_context *ctx) { struct nfs3_createdata *data; mode_t mode = sattr->ia_mode; @@ -438,19 +438,38 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } +static void +nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +{ + msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; +} + +static int +nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, + struct inode *new_dir) +{ + struct nfs_renameres *res; + + if (nfs3_async_handle_jukebox(task, old_dir)) + return 0; + res = task->tk_msg.rpc_resp; + + nfs_post_op_update_inode(old_dir, res->old_fattr); + nfs_post_op_update_inode(new_dir, res->new_fattr); + return 1; +} + static int nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { - struct nfs3_renameargs arg = { - .fromfh = NFS_FH(old_dir), - .fromname = old_name->name, - .fromlen = old_name->len, - .tofh = NFS_FH(new_dir), - .toname = new_name->name, - .tolen = new_name->len + struct nfs_renameargs arg = { + .old_dir = NFS_FH(old_dir), + .old_name = old_name, + .new_dir = NFS_FH(new_dir), + .new_name = new_name, }; - struct nfs3_renameres res; + struct nfs_renameres res; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], .rpc_argp = &arg, @@ -460,17 +479,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - res.fromattr = nfs_alloc_fattr(); - res.toattr = nfs_alloc_fattr(); - if (res.fromattr == NULL || res.toattr == NULL) + res.old_fattr = nfs_alloc_fattr(); + res.new_fattr = nfs_alloc_fattr(); + if (res.old_fattr == NULL || res.new_fattr == NULL) goto out; status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); - nfs_post_op_update_inode(old_dir, res.fromattr); - nfs_post_op_update_inode(new_dir, res.toattr); + nfs_post_op_update_inode(old_dir, res.old_fattr); + nfs_post_op_update_inode(new_dir, res.new_fattr); out: - nfs_free_fattr(res.toattr); - nfs_free_fattr(res.fromattr); + nfs_free_fattr(res.old_fattr); + nfs_free_fattr(res.new_fattr); dprintk("NFS reply rename: %d\n", status); return status; } @@ -611,7 +630,7 @@ out: */ static int nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; __be32 *verf = NFS_COOKIEVERF(dir); @@ -621,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .verf = {verf[0], verf[1]}, .plus = plus, .count = count, - .pages = &page + .pages = pages }; struct nfs3_readdirres res = { .verf = verf, @@ -652,7 +671,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, nfs_free_fattr(res.dir_attr); out: - dprintk("NFS reply readdir: %d\n", status); + dprintk("NFS reply readdir%s: %d\n", + plus? "plus" : "", status); return status; } @@ -722,7 +742,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("NFS call fsstat\n"); nfs_fattr_init(stat->fattr); status = rpc_call_sync(server->client, &msg, 0); - dprintk("NFS reply statfs: %d\n", status); + dprintk("NFS reply fsstat: %d\n", status); return status; } @@ -844,6 +864,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .unlink_setup = nfs3_proc_unlink_setup, .unlink_done = nfs3_proc_unlink_done, .rename = nfs3_proc_rename, + .rename_setup = nfs3_proc_rename_setup, + .rename_done = nfs3_proc_rename_done, .link = nfs3_proc_link, .symlink = nfs3_proc_symlink, .mkdir = nfs3_proc_mkdir, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9769704f8ce..183c6b123d0 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -37,18 +37,16 @@ #define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) #define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) #define NFS3_fattr_sz (21) -#define NFS3_wcc_attr_sz (6) +#define NFS3_cookieverf_sz (NFS3_COOKIEVERFSIZE>>2) +#define NFS3_wcc_attr_sz (6) #define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz) #define NFS3_post_op_attr_sz (1+NFS3_fattr_sz) -#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz) -#define NFS3_fsstat_sz -#define NFS3_fsinfo_sz -#define NFS3_pathconf_sz -#define NFS3_entry_sz (NFS3_filename_sz+3) - -#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3) +#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz) #define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz) -#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz) + +#define NFS3_getattrargs_sz (NFS3_fh_sz) +#define NFS3_setattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3) +#define NFS3_lookupargs_sz (NFS3_fh_sz+NFS3_filename_sz) #define NFS3_accessargs_sz (NFS3_fh_sz+1) #define NFS3_readlinkargs_sz (NFS3_fh_sz) #define NFS3_readargs_sz (NFS3_fh_sz+3) @@ -57,14 +55,16 @@ #define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) #define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz) #define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) +#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz) #define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) #define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) -#define NFS3_readdirargs_sz (NFS3_fh_sz+2) +#define NFS3_readdirargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+3) +#define NFS3_readdirplusargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+4) #define NFS3_commitargs_sz (NFS3_fh_sz+3) -#define NFS3_attrstat_sz (1+NFS3_fattr_sz) -#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz) -#define NFS3_removeres_sz (NFS3_wccstat_sz) +#define NFS3_getattrres_sz (1+NFS3_fattr_sz) +#define NFS3_setattrres_sz (1+NFS3_wcc_data_sz) +#define NFS3_removeres_sz (NFS3_setattrres_sz) #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) #define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) @@ -101,1074 +101,2364 @@ static const umode_t nfs_type2fmt[] = { }; /* - * Common NFS XDR functions as inlines + * While encoding arguments, set up the reply buffer in advance to + * receive reply data directly into the page cache. */ -static inline __be32 * -xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fh) +static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages, + unsigned int base, unsigned int len, + unsigned int bufsize) { - return xdr_encode_array(p, fh->data, fh->size); + struct rpc_auth *auth = req->rq_cred->cr_auth; + unsigned int replen; + + replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize; + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len); } -static inline __be32 * -xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh) +/* + * Handle decode buffer overflows out-of-line. + */ +static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) { - if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { - memcpy(fh->data, p, fh->size); - return p + XDR_QUADLEN(fh->size); - } - return NULL; + dprintk("NFS: %s prematurely hit the end of our receive buffer. " + "Remaining buffer length is %tu words.\n", + func, xdr->end - xdr->p); } + /* - * Encode/decode time. + * Encode/decode NFSv3 basic data types + * + * Basic NFSv3 data types are defined in section 2.5 of RFC 1813: + * "NFS Version 3 Protocol Specification". + * + * Not all basic data types have their own encoding and decoding + * functions. For run-time efficiency, some data types are encoded + * or decoded inline. */ -static inline __be32 * -xdr_encode_time3(__be32 *p, struct timespec *timep) + +static void encode_uint32(struct xdr_stream *xdr, u32 value) { - *p++ = htonl(timep->tv_sec); - *p++ = htonl(timep->tv_nsec); - return p; + __be32 *p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(value); } -static inline __be32 * -xdr_decode_time3(__be32 *p, struct timespec *timep) +static int decode_uint32(struct xdr_stream *xdr, u32 *value) { - timep->tv_sec = ntohl(*p++); - timep->tv_nsec = ntohl(*p++); - return p; + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + *value = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } -static __be32 * -xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) +static int decode_uint64(struct xdr_stream *xdr, u64 *value) { - unsigned int type, major, minor; - umode_t fmode; + __be32 *p; - type = ntohl(*p++); - if (type > NF3FIFO) - type = NF3NON; - fmode = nfs_type2fmt[type]; - fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; - fattr->nlink = ntohl(*p++); - fattr->uid = ntohl(*p++); - fattr->gid = ntohl(*p++); - p = xdr_decode_hyper(p, &fattr->size); - p = xdr_decode_hyper(p, &fattr->du.nfs3.used); - - /* Turn remote device info into Linux-specific dev_t */ - major = ntohl(*p++); - minor = ntohl(*p++); - fattr->rdev = MKDEV(major, minor); - if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) - fattr->rdev = 0; + p = xdr_inline_decode(xdr, 8); + if (unlikely(p == NULL)) + goto out_overflow; + xdr_decode_hyper(p, value); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} - p = xdr_decode_hyper(p, &fattr->fsid.major); - fattr->fsid.minor = 0; - p = xdr_decode_hyper(p, &fattr->fileid); - p = xdr_decode_time3(p, &fattr->atime); - p = xdr_decode_time3(p, &fattr->mtime); - p = xdr_decode_time3(p, &fattr->ctime); +/* + * fileid3 + * + * typedef uint64 fileid3; + */ +static __be32 *xdr_decode_fileid3(__be32 *p, u64 *fileid) +{ + return xdr_decode_hyper(p, fileid); +} - /* Update the mode bits */ - fattr->valid |= NFS_ATTR_FATTR_V3; - return p; +static int decode_fileid3(struct xdr_stream *xdr, u64 *fileid) +{ + return decode_uint64(xdr, fileid); } -static inline __be32 * -xdr_encode_sattr(__be32 *p, struct iattr *attr) +/* + * filename3 + * + * typedef string filename3<>; + */ +static void encode_filename3(struct xdr_stream *xdr, + const char *name, u32 length) { - if (attr->ia_valid & ATTR_MODE) { - *p++ = xdr_one; - *p++ = htonl(attr->ia_mode & S_IALLUGO); - } else { - *p++ = xdr_zero; - } - if (attr->ia_valid & ATTR_UID) { - *p++ = xdr_one; - *p++ = htonl(attr->ia_uid); - } else { - *p++ = xdr_zero; - } - if (attr->ia_valid & ATTR_GID) { - *p++ = xdr_one; - *p++ = htonl(attr->ia_gid); - } else { - *p++ = xdr_zero; - } - if (attr->ia_valid & ATTR_SIZE) { - *p++ = xdr_one; - p = xdr_encode_hyper(p, (__u64) attr->ia_size); - } else { - *p++ = xdr_zero; - } - if (attr->ia_valid & ATTR_ATIME_SET) { - *p++ = xdr_two; - p = xdr_encode_time3(p, &attr->ia_atime); - } else if (attr->ia_valid & ATTR_ATIME) { - *p++ = xdr_one; - } else { - *p++ = xdr_zero; - } - if (attr->ia_valid & ATTR_MTIME_SET) { - *p++ = xdr_two; - p = xdr_encode_time3(p, &attr->ia_mtime); - } else if (attr->ia_valid & ATTR_MTIME) { - *p++ = xdr_one; - } else { - *p++ = xdr_zero; - } - return p; + __be32 *p; + + BUG_ON(length > NFS3_MAXNAMLEN); + p = xdr_reserve_space(xdr, 4 + length); + xdr_encode_opaque(p, name, length); } -static inline __be32 * -xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr) +static int decode_inline_filename3(struct xdr_stream *xdr, + const char **name, u32 *length) { - p = xdr_decode_hyper(p, &fattr->pre_size); - p = xdr_decode_time3(p, &fattr->pre_mtime); - p = xdr_decode_time3(p, &fattr->pre_ctime); - fattr->valid |= NFS_ATTR_FATTR_PRESIZE - | NFS_ATTR_FATTR_PREMTIME - | NFS_ATTR_FATTR_PRECTIME; - return p; + __be32 *p; + u32 count; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + count = be32_to_cpup(p); + if (count > NFS3_MAXNAMLEN) + goto out_nametoolong; + p = xdr_inline_decode(xdr, count); + if (unlikely(p == NULL)) + goto out_overflow; + *name = (const char *)p; + *length = count; + return 0; + +out_nametoolong: + dprintk("NFS: returned filename too long: %u\n", count); + return -ENAMETOOLONG; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } -static inline __be32 * -xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr) +/* + * nfspath3 + * + * typedef string nfspath3<>; + */ +static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages, + const u32 length) { - if (*p++) - p = xdr_decode_fattr(p, fattr); - return p; + BUG_ON(length > NFS3_MAXPATHLEN); + encode_uint32(xdr, length); + xdr_write_pages(xdr, pages, 0, length); } -static inline __be32 * -xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) +static int decode_nfspath3(struct xdr_stream *xdr) { - if (*p++) - return xdr_decode_wcc_attr(p, fattr); - return p; + u32 recvd, count; + size_t hdrlen; + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + count = be32_to_cpup(p); + if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN)) + goto out_nametoolong; + hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + recvd = xdr->buf->len - hdrlen; + if (unlikely(count > recvd)) + goto out_cheating; + + xdr_read_pages(xdr, count); + xdr_terminate_string(xdr->buf, count); + return 0; + +out_nametoolong: + dprintk("NFS: returned pathname too long: %u\n", count); + return -ENAMETOOLONG; +out_cheating: + dprintk("NFS: server cheating in pathname result: " + "count %u > recvd %u\n", count, recvd); + return -EIO; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } +/* + * cookie3 + * + * typedef uint64 cookie3 + */ +static __be32 *xdr_encode_cookie3(__be32 *p, u64 cookie) +{ + return xdr_encode_hyper(p, cookie); +} -static inline __be32 * -xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr) +static int decode_cookie3(struct xdr_stream *xdr, u64 *cookie) { - p = xdr_decode_pre_op_attr(p, fattr); - return xdr_decode_post_op_attr(p, fattr); + return decode_uint64(xdr, cookie); } /* - * NFS encode functions + * cookieverf3 + * + * typedef opaque cookieverf3[NFS3_COOKIEVERFSIZE]; */ +static __be32 *xdr_encode_cookieverf3(__be32 *p, const __be32 *verifier) +{ + memcpy(p, verifier, NFS3_COOKIEVERFSIZE); + return p + XDR_QUADLEN(NFS3_COOKIEVERFSIZE); +} + +static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); + if (unlikely(p == NULL)) + goto out_overflow; + memcpy(verifier, p, NFS3_COOKIEVERFSIZE); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} /* - * Encode file handle argument + * createverf3 + * + * typedef opaque createverf3[NFS3_CREATEVERFSIZE]; */ -static int -nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh) +static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier) { - p = xdr_encode_fhandle(p, fh); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + __be32 *p; + + p = xdr_reserve_space(xdr, NFS3_CREATEVERFSIZE); + memcpy(p, verifier, NFS3_CREATEVERFSIZE); +} + +static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE); + if (unlikely(p == NULL)) + goto out_overflow; + memcpy(verifier, p, NFS3_WRITEVERFSIZE); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Encode SETATTR arguments + * size3 + * + * typedef uint64 size3; */ -static int -nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args) -{ - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); - *p++ = htonl(args->guard); - if (args->guard) - p = xdr_encode_time3(p, &args->guardtime); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; +static __be32 *xdr_decode_size3(__be32 *p, u64 *size) +{ + return xdr_decode_hyper(p, size); } /* - * Encode directory ops argument + * nfsstat3 + * + * enum nfsstat3 { + * NFS3_OK = 0, + * ... + * } */ -static int -nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args) +#define NFS3_OK NFS_OK + +static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name, args->len); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + *status = be32_to_cpup(p); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Encode REMOVE argument + * ftype3 + * + * enum ftype3 { + * NF3REG = 1, + * NF3DIR = 2, + * NF3BLK = 3, + * NF3CHR = 4, + * NF3LNK = 5, + * NF3SOCK = 6, + * NF3FIFO = 7 + * }; */ -static int -nfs3_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) +static void encode_ftype3(struct xdr_stream *xdr, const u32 type) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name.name, args->name.len); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + BUG_ON(type > NF3FIFO); + encode_uint32(xdr, type); +} + +static __be32 *xdr_decode_ftype3(__be32 *p, umode_t *mode) +{ + u32 type; + + type = be32_to_cpup(p++); + if (type > NF3FIFO) + type = NF3NON; + *mode = nfs_type2fmt[type]; + return p; } /* - * Encode access() argument + * specdata3 + * + * struct specdata3 { + * uint32 specdata1; + * uint32 specdata2; + * }; */ -static int -nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *args) +static void encode_specdata3(struct xdr_stream *xdr, const dev_t rdev) { - p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->access); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + __be32 *p; + + p = xdr_reserve_space(xdr, 8); + *p++ = cpu_to_be32(MAJOR(rdev)); + *p = cpu_to_be32(MINOR(rdev)); +} + +static __be32 *xdr_decode_specdata3(__be32 *p, dev_t *rdev) +{ + unsigned int major, minor; + + major = be32_to_cpup(p++); + minor = be32_to_cpup(p++); + *rdev = MKDEV(major, minor); + if (MAJOR(*rdev) != major || MINOR(*rdev) != minor) + *rdev = 0; + return p; } /* - * Arguments to a READ call. Since we read data directly into the page - * cache, we also set up the reply iovec here so that iov[1] points - * exactly to the page we want to fetch. + * nfs_fh3 + * + * struct nfs_fh3 { + * opaque data<NFS3_FHSIZE>; + * }; */ -static int -nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) +static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh) { - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - u32 count = args->count; + __be32 *p; - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->offset); - *p++ = htonl(count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + BUG_ON(fh->size > NFS3_FHSIZE); + p = xdr_reserve_space(xdr, 4 + fh->size); + xdr_encode_opaque(p, fh->data, fh->size); +} - /* Inline the page array */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, - args->pages, args->pgbase, count); - req->rq_rcv_buf.flags |= XDRBUF_READ; +static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + u32 length; + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + length = be32_to_cpup(p++); + if (unlikely(length > NFS3_FHSIZE)) + goto out_toobig; + p = xdr_inline_decode(xdr, length); + if (unlikely(p == NULL)) + goto out_overflow; + fh->size = length; + memcpy(fh->data, p, length); return 0; +out_toobig: + dprintk("NFS: file handle size (%u) too big\n", length); + return -E2BIG; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static void zero_nfs_fh3(struct nfs_fh *fh) +{ + memset(fh, 0, sizeof(*fh)); } /* - * Write arguments. Splice the buffer to be written into the iovec. + * nfstime3 + * + * struct nfstime3 { + * uint32 seconds; + * uint32 nseconds; + * }; */ -static int -nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep) { - struct xdr_buf *sndbuf = &req->rq_snd_buf; - u32 count = args->count; + *p++ = cpu_to_be32(timep->tv_sec); + *p++ = cpu_to_be32(timep->tv_nsec); + return p; +} - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->offset); - *p++ = htonl(count); - *p++ = htonl(args->stable); - *p++ = htonl(count); - sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); - - /* Copy the page array */ - xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); - sndbuf->flags |= XDRBUF_WRITE; - return 0; +static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep) +{ + timep->tv_sec = be32_to_cpup(p++); + timep->tv_nsec = be32_to_cpup(p++); + return p; } /* - * Encode CREATE arguments + * sattr3 + * + * enum time_how { + * DONT_CHANGE = 0, + * SET_TO_SERVER_TIME = 1, + * SET_TO_CLIENT_TIME = 2 + * }; + * + * union set_mode3 switch (bool set_it) { + * case TRUE: + * mode3 mode; + * default: + * void; + * }; + * + * union set_uid3 switch (bool set_it) { + * case TRUE: + * uid3 uid; + * default: + * void; + * }; + * + * union set_gid3 switch (bool set_it) { + * case TRUE: + * gid3 gid; + * default: + * void; + * }; + * + * union set_size3 switch (bool set_it) { + * case TRUE: + * size3 size; + * default: + * void; + * }; + * + * union set_atime switch (time_how set_it) { + * case SET_TO_CLIENT_TIME: + * nfstime3 atime; + * default: + * void; + * }; + * + * union set_mtime switch (time_how set_it) { + * case SET_TO_CLIENT_TIME: + * nfstime3 mtime; + * default: + * void; + * }; + * + * struct sattr3 { + * set_mode3 mode; + * set_uid3 uid; + * set_gid3 gid; + * set_size3 size; + * set_atime atime; + * set_mtime mtime; + * }; */ -static int -nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *args) +static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name, args->len); + u32 nbytes; + __be32 *p; - *p++ = htonl(args->createmode); - if (args->createmode == NFS3_CREATE_EXCLUSIVE) { - *p++ = args->verifier[0]; - *p++ = args->verifier[1]; + /* + * In order to make only a single xdr_reserve_space() call, + * pre-compute the total number of bytes to be reserved. + * Six boolean values, one for each set_foo field, are always + * present in the encoded result, so start there. + */ + nbytes = 6 * 4; + if (attr->ia_valid & ATTR_MODE) + nbytes += 4; + if (attr->ia_valid & ATTR_UID) + nbytes += 4; + if (attr->ia_valid & ATTR_GID) + nbytes += 4; + if (attr->ia_valid & ATTR_SIZE) + nbytes += 8; + if (attr->ia_valid & ATTR_ATIME_SET) + nbytes += 8; + if (attr->ia_valid & ATTR_MTIME_SET) + nbytes += 8; + p = xdr_reserve_space(xdr, nbytes); + + if (attr->ia_valid & ATTR_MODE) { + *p++ = xdr_one; + *p++ = cpu_to_be32(attr->ia_mode & S_IALLUGO); } else - p = xdr_encode_sattr(p, args->sattr); + *p++ = xdr_zero; - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + if (attr->ia_valid & ATTR_UID) { + *p++ = xdr_one; + *p++ = cpu_to_be32(attr->ia_uid); + } else + *p++ = xdr_zero; + + if (attr->ia_valid & ATTR_GID) { + *p++ = xdr_one; + *p++ = cpu_to_be32(attr->ia_gid); + } else + *p++ = xdr_zero; + + if (attr->ia_valid & ATTR_SIZE) { + *p++ = xdr_one; + p = xdr_encode_hyper(p, (u64)attr->ia_size); + } else + *p++ = xdr_zero; + + if (attr->ia_valid & ATTR_ATIME_SET) { + *p++ = xdr_two; + p = xdr_encode_nfstime3(p, &attr->ia_atime); + } else if (attr->ia_valid & ATTR_ATIME) { + *p++ = xdr_one; + } else + *p++ = xdr_zero; + + if (attr->ia_valid & ATTR_MTIME_SET) { + *p++ = xdr_two; + xdr_encode_nfstime3(p, &attr->ia_mtime); + } else if (attr->ia_valid & ATTR_MTIME) { + *p = xdr_one; + } else + *p = xdr_zero; } /* - * Encode MKDIR arguments + * fattr3 + * + * struct fattr3 { + * ftype3 type; + * mode3 mode; + * uint32 nlink; + * uid3 uid; + * gid3 gid; + * size3 size; + * size3 used; + * specdata3 rdev; + * uint64 fsid; + * fileid3 fileid; + * nfstime3 atime; + * nfstime3 mtime; + * nfstime3 ctime; + * }; */ -static int -nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args) +static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + umode_t fmode; + __be32 *p; + + p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2); + if (unlikely(p == NULL)) + goto out_overflow; + + p = xdr_decode_ftype3(p, &fmode); + + fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode; + fattr->nlink = be32_to_cpup(p++); + fattr->uid = be32_to_cpup(p++); + fattr->gid = be32_to_cpup(p++); + + p = xdr_decode_size3(p, &fattr->size); + p = xdr_decode_size3(p, &fattr->du.nfs3.used); + p = xdr_decode_specdata3(p, &fattr->rdev); + + p = xdr_decode_hyper(p, &fattr->fsid.major); + fattr->fsid.minor = 0; + + p = xdr_decode_fileid3(p, &fattr->fileid); + p = xdr_decode_nfstime3(p, &fattr->atime); + p = xdr_decode_nfstime3(p, &fattr->mtime); + xdr_decode_nfstime3(p, &fattr->ctime); + + fattr->valid |= NFS_ATTR_FATTR_V3; return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Encode SYMLINK arguments + * post_op_attr + * + * union post_op_attr switch (bool attributes_follow) { + * case TRUE: + * fattr3 attributes; + * case FALSE: + * void; + * }; */ -static int -nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *args) +static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) { - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_sattr(p, args->sattr); - *p++ = htonl(args->pathlen); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + __be32 *p; - /* Copy the page */ - xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen); + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p != xdr_zero) + return decode_fattr3(xdr, fattr); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Encode MKNOD arguments + * wcc_attr + * struct wcc_attr { + * size3 size; + * nfstime3 mtime; + * nfstime3 ctime; + * }; */ -static int -nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args) -{ - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_array(p, args->name, args->len); - *p++ = htonl(args->type); - p = xdr_encode_sattr(p, args->sattr); - if (args->type == NF3CHR || args->type == NF3BLK) { - *p++ = htonl(MAJOR(args->rdev)); - *p++ = htonl(MINOR(args->rdev)); - } +static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2); + if (unlikely(p == NULL)) + goto out_overflow; + + fattr->valid |= NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PREMTIME + | NFS_ATTR_FATTR_PRECTIME; + + p = xdr_decode_size3(p, &fattr->pre_size); + p = xdr_decode_nfstime3(p, &fattr->pre_mtime); + xdr_decode_nfstime3(p, &fattr->pre_ctime); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Encode RENAME arguments + * pre_op_attr + * union pre_op_attr switch (bool attributes_follow) { + * case TRUE: + * wcc_attr attributes; + * case FALSE: + * void; + * }; + * + * wcc_data + * + * struct wcc_data { + * pre_op_attr before; + * post_op_attr after; + * }; */ -static int -nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) -{ - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_array(p, args->toname, args->tolen); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); +static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p != xdr_zero) + return decode_wcc_attr(xdr, fattr); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } -/* - * Encode LINK arguments - */ -static int -nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) +static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr) { - p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_array(p, args->toname, args->tolen); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + int error; + + error = decode_pre_op_attr(xdr, fattr); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, fattr); +out: + return error; } /* - * Encode arguments to readdir call + * post_op_fh3 + * + * union post_op_fh3 switch (bool handle_follows) { + * case TRUE: + * nfs_fh3 handle; + * case FALSE: + * void; + * }; */ -static int -nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) +static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh) { - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; - u32 count = args->count; - - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->cookie); - *p++ = args->verf[0]; - *p++ = args->verf[1]; - if (args->plus) { - /* readdirplus: need dircount + buffer size. - * We just make sure we make dircount big enough */ - *p++ = htonl(count >> 3); - } - *p++ = htonl(count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - - /* Inline the page array */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); + __be32 *p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p != xdr_zero) + return decode_nfs_fh3(xdr, fh); + zero_nfs_fh3(fh); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } /* - * Decode the result of a readdir call. - * We just check for syntactical correctness. + * diropargs3 + * + * struct diropargs3 { + * nfs_fh3 dir; + * filename3 name; + * }; */ -static int -nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res) +static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh, + const char *name, u32 length) { - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - struct page **page; - size_t hdrlen; - u32 len, recvd, pglen; - int status, nr = 0; - __be32 *entry, *end, *kaddr; - - status = ntohl(*p++); - /* Decode post_op_attrs */ - p = xdr_decode_post_op_attr(p, res->dir_attr); - if (status) - return nfs_stat_to_errno(status); - /* Decode verifier cookie */ - if (res->verf) { - res->verf[0] = *p++; - res->verf[1] = *p++; - } else { - p += 2; - } - - hdrlen = (u8 *) p - (u8 *) iov->iov_base; - if (iov->iov_len < hdrlen) { - dprintk("NFS: READDIR reply header overflowed:" - "length %Zu > %Zu\n", hdrlen, iov->iov_len); - return -errno_NFSERR_IO; - } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); - xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); - } + encode_nfs_fh3(xdr, fh); + encode_filename3(xdr, name, length); +} - pglen = rcvbuf->page_len; - recvd = rcvbuf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - page = rcvbuf->pages; - kaddr = p = kmap_atomic(*page, KM_USER0); - end = (__be32 *)((char *)p + pglen); - entry = p; - - /* Make sure the packet actually has a value_follows and EOF entry */ - if ((entry + 1) > end) - goto short_pkt; - - for (; *p++; nr++) { - if (p + 3 > end) - goto short_pkt; - p += 2; /* inode # */ - len = ntohl(*p++); /* string length */ - p += XDR_QUADLEN(len) + 2; /* name + cookie */ - if (len > NFS3_MAXNAMLEN) { - dprintk("NFS: giant filename in readdir (len 0x%x)!\n", - len); - goto err_unmap; - } - - if (res->plus) { - /* post_op_attr */ - if (p + 2 > end) - goto short_pkt; - if (*p++) { - p += 21; - if (p + 1 > end) - goto short_pkt; - } - /* post_op_fh3 */ - if (*p++) { - if (p + 1 > end) - goto short_pkt; - len = ntohl(*p++); - if (len > NFS3_FHSIZE) { - dprintk("NFS: giant filehandle in " - "readdir (len 0x%x)!\n", len); - goto err_unmap; - } - p += XDR_QUADLEN(len); - } - } - if (p + 2 > end) - goto short_pkt; - entry = p; - } +/* + * NFSv3 XDR encode functions + * + * NFSv3 argument types are defined in section 3.3 of RFC 1813: + * "NFS Version 3 Protocol Specification". + */ - /* - * Apparently some server sends responses that are a valid size, but - * contain no entries, and have value_follows==0 and EOF==0. For - * those, just set the EOF marker. - */ - if (!nr && entry[1] == 0) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } - out: - kunmap_atomic(kaddr, KM_USER0); - return nr; - short_pkt: - /* - * When we get a short packet there are 2 possibilities. We can - * return an error, or fix up the response to look like a valid - * response and return what we have so far. If there are no - * entries and the packet was short, then return -EIO. If there - * are valid entries in the response, return them and pretend that - * the call was successful, but incomplete. The caller can retry the - * readdir starting at the last cookie. - */ - entry[0] = entry[1] = 0; - if (!nr) - nr = -errno_NFSERR_IO; - goto out; -err_unmap: - nr = -errno_NFSERR_IO; - goto out; +/* + * 3.3.1 GETATTR3args + * + * struct GETATTR3args { + * nfs_fh3 object; + * }; + */ +static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_fh *fh) +{ + encode_nfs_fh3(xdr, fh); } -__be32 * -nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +/* + * 3.3.2 SETATTR3args + * + * union sattrguard3 switch (bool check) { + * case TRUE: + * nfstime3 obj_ctime; + * case FALSE: + * void; + * }; + * + * struct SETATTR3args { + * nfs_fh3 object; + * sattr3 new_attributes; + * sattrguard3 guard; + * }; + */ +static void encode_sattrguard3(struct xdr_stream *xdr, + const struct nfs3_sattrargs *args) { - struct nfs_entry old = *entry; - - if (!*p++) { - if (!*p) - return ERR_PTR(-EAGAIN); - entry->eof = 1; - return ERR_PTR(-EBADCOOKIE); - } - - p = xdr_decode_hyper(p, &entry->ino); - entry->len = ntohl(*p++); - entry->name = (const char *) p; - p += XDR_QUADLEN(entry->len); - entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + __be32 *p; - if (plus) { - entry->fattr->valid = 0; - p = xdr_decode_post_op_attr(p, entry->fattr); - /* In fact, a post_op_fh3: */ - if (*p++) { - p = xdr_decode_fhandle(p, entry->fh); - /* Ugh -- server reply was truncated */ - if (p == NULL) { - dprintk("NFS: FH truncated\n"); - *entry = old; - return ERR_PTR(-EAGAIN); - } - } else - memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); + if (args->guard) { + p = xdr_reserve_space(xdr, 4 + 8); + *p++ = xdr_one; + xdr_encode_nfstime3(p, &args->guardtime); + } else { + p = xdr_reserve_space(xdr, 4); + *p = xdr_zero; } +} - entry->eof = !p[0] && p[1]; - return p; +static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_sattrargs *args) +{ + encode_nfs_fh3(xdr, args->fh); + encode_sattr3(xdr, args->sattr); + encode_sattrguard3(xdr, args); } /* - * Encode COMMIT arguments + * 3.3.3 LOOKUP3args + * + * struct LOOKUP3args { + * diropargs3 what; + * }; */ -static int -nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_diropargs *args) { - p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->offset); - *p++ = htonl(args->count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; + encode_diropargs3(xdr, args->fh, args->name, args->len); } -#ifdef CONFIG_NFS_V3_ACL /* - * Encode GETACL arguments + * 3.3.4 ACCESS3args + * + * struct ACCESS3args { + * nfs_fh3 object; + * uint32 access; + * }; */ -static int -nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, - struct nfs3_getaclargs *args) +static void encode_access3args(struct xdr_stream *xdr, + const struct nfs3_accessargs *args) { - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; + encode_nfs_fh3(xdr, args->fh); + encode_uint32(xdr, args->access); +} - p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->mask); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); +static void nfs3_xdr_enc_access3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_accessargs *args) +{ + encode_access3args(xdr, args); +} - if (args->mask & (NFS_ACL | NFS_DFACL)) { - /* Inline the page array */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + - ACL3_getaclres_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, - NFSACL_MAXPAGES << PAGE_SHIFT); - } - return 0; +/* + * 3.3.5 READLINK3args + * + * struct READLINK3args { + * nfs_fh3 symlink; + * }; + */ +static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_readlinkargs *args) +{ + encode_nfs_fh3(xdr, args->fh); + prepare_reply_buffer(req, args->pages, args->pgbase, + args->pglen, NFS3_readlinkres_sz); } /* - * Encode SETACL arguments + * 3.3.6 READ3args + * + * struct READ3args { + * nfs_fh3 file; + * offset3 offset; + * count3 count; + * }; */ -static int -nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p, - struct nfs3_setaclargs *args) +static void encode_read3args(struct xdr_stream *xdr, + const struct nfs_readargs *args) { - struct xdr_buf *buf = &req->rq_snd_buf; - unsigned int base; - int err; + __be32 *p; - p = xdr_encode_fhandle(p, NFS_FH(args->inode)); - *p++ = htonl(args->mask); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - base = req->rq_slen; + encode_nfs_fh3(xdr, args->fh); - if (args->npages != 0) - xdr_encode_pages(buf, args->pages, 0, args->len); - else - req->rq_slen = xdr_adjust_iovec(req->rq_svec, - p + XDR_QUADLEN(args->len)); + p = xdr_reserve_space(xdr, 8 + 4); + p = xdr_encode_hyper(p, args->offset); + *p = cpu_to_be32(args->count); +} - err = nfsacl_encode(buf, base, args->inode, - (args->mask & NFS_ACL) ? - args->acl_access : NULL, 1, 0); - if (err > 0) - err = nfsacl_encode(buf, base + err, args->inode, - (args->mask & NFS_DFACL) ? - args->acl_default : NULL, 1, - NFS_ACL_DEFAULT); - return (err > 0) ? 0 : err; +static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_readargs *args) +{ + encode_read3args(xdr, args); + prepare_reply_buffer(req, args->pages, args->pgbase, + args->count, NFS3_readres_sz); + req->rq_rcv_buf.flags |= XDRBUF_READ; } -#endif /* CONFIG_NFS_V3_ACL */ /* - * NFS XDR decode functions + * 3.3.7 WRITE3args + * + * enum stable_how { + * UNSTABLE = 0, + * DATA_SYNC = 1, + * FILE_SYNC = 2 + * }; + * + * struct WRITE3args { + * nfs_fh3 file; + * offset3 offset; + * count3 count; + * stable_how stable; + * opaque data<>; + * }; */ +static void encode_write3args(struct xdr_stream *xdr, + const struct nfs_writeargs *args) +{ + __be32 *p; + + encode_nfs_fh3(xdr, args->fh); + + p = xdr_reserve_space(xdr, 8 + 4 + 4 + 4); + p = xdr_encode_hyper(p, args->offset); + *p++ = cpu_to_be32(args->count); + *p++ = cpu_to_be32(args->stable); + *p = cpu_to_be32(args->count); + xdr_write_pages(xdr, args->pages, args->pgbase, args->count); +} + +static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_writeargs *args) +{ + encode_write3args(xdr, args); + xdr->buf->flags |= XDRBUF_WRITE; +} /* - * Decode attrstat reply. + * 3.3.8 CREATE3args + * + * enum createmode3 { + * UNCHECKED = 0, + * GUARDED = 1, + * EXCLUSIVE = 2 + * }; + * + * union createhow3 switch (createmode3 mode) { + * case UNCHECKED: + * case GUARDED: + * sattr3 obj_attributes; + * case EXCLUSIVE: + * createverf3 verf; + * }; + * + * struct CREATE3args { + * diropargs3 where; + * createhow3 how; + * }; */ -static int -nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +static void encode_createhow3(struct xdr_stream *xdr, + const struct nfs3_createargs *args) { - int status; + encode_uint32(xdr, args->createmode); + switch (args->createmode) { + case NFS3_CREATE_UNCHECKED: + case NFS3_CREATE_GUARDED: + encode_sattr3(xdr, args->sattr); + break; + case NFS3_CREATE_EXCLUSIVE: + encode_createverf3(xdr, args->verifier); + break; + default: + BUG(); + } +} - if ((status = ntohl(*p++))) - return nfs_stat_to_errno(status); - xdr_decode_fattr(p, fattr); - return 0; +static void nfs3_xdr_enc_create3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_createargs *args) +{ + encode_diropargs3(xdr, args->fh, args->name, args->len); + encode_createhow3(xdr, args); } /* - * Decode status+wcc_data reply - * SATTR, REMOVE, RMDIR + * 3.3.9 MKDIR3args + * + * struct MKDIR3args { + * diropargs3 where; + * sattr3 attributes; + * }; */ -static int -nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_mkdirargs *args) { - int status; + encode_diropargs3(xdr, args->fh, args->name, args->len); + encode_sattr3(xdr, args->sattr); +} - if ((status = ntohl(*p++))) - status = nfs_stat_to_errno(status); - xdr_decode_wcc_data(p, fattr); - return status; +/* + * 3.3.10 SYMLINK3args + * + * struct symlinkdata3 { + * sattr3 symlink_attributes; + * nfspath3 symlink_data; + * }; + * + * struct SYMLINK3args { + * diropargs3 where; + * symlinkdata3 symlink; + * }; + */ +static void encode_symlinkdata3(struct xdr_stream *xdr, + const struct nfs3_symlinkargs *args) +{ + encode_sattr3(xdr, args->sattr); + encode_nfspath3(xdr, args->pages, args->pathlen); } -static int -nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res) +static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_symlinkargs *args) { - return nfs3_xdr_wccstat(req, p, res->dir_attr); + encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen); + encode_symlinkdata3(xdr, args); } /* - * Decode LOOKUP reply + * 3.3.11 MKNOD3args + * + * struct devicedata3 { + * sattr3 dev_attributes; + * specdata3 spec; + * }; + * + * union mknoddata3 switch (ftype3 type) { + * case NF3CHR: + * case NF3BLK: + * devicedata3 device; + * case NF3SOCK: + * case NF3FIFO: + * sattr3 pipe_attributes; + * default: + * void; + * }; + * + * struct MKNOD3args { + * diropargs3 where; + * mknoddata3 what; + * }; */ -static int -nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) +static void encode_devicedata3(struct xdr_stream *xdr, + const struct nfs3_mknodargs *args) { - int status; + encode_sattr3(xdr, args->sattr); + encode_specdata3(xdr, args->rdev); +} - if ((status = ntohl(*p++))) { - status = nfs_stat_to_errno(status); - } else { - if (!(p = xdr_decode_fhandle(p, res->fh))) - return -errno_NFSERR_IO; - p = xdr_decode_post_op_attr(p, res->fattr); +static void encode_mknoddata3(struct xdr_stream *xdr, + const struct nfs3_mknodargs *args) +{ + encode_ftype3(xdr, args->type); + switch (args->type) { + case NF3CHR: + case NF3BLK: + encode_devicedata3(xdr, args); + break; + case NF3SOCK: + case NF3FIFO: + encode_sattr3(xdr, args->sattr); + break; + case NF3REG: + case NF3DIR: + break; + default: + BUG(); } - xdr_decode_post_op_attr(p, res->dir_attr); - return status; +} + +static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_mknodargs *args) +{ + encode_diropargs3(xdr, args->fh, args->name, args->len); + encode_mknoddata3(xdr, args); } /* - * Decode ACCESS reply + * 3.3.12 REMOVE3args + * + * struct REMOVE3args { + * diropargs3 object; + * }; */ -static int -nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) +static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_removeargs *args) { - int status = ntohl(*p++); + encode_diropargs3(xdr, args->fh, args->name.name, args->name.len); +} - p = xdr_decode_post_op_attr(p, res->fattr); - if (status) - return nfs_stat_to_errno(status); - res->access = ntohl(*p++); - return 0; +/* + * 3.3.14 RENAME3args + * + * struct RENAME3args { + * diropargs3 from; + * diropargs3 to; + * }; + */ +static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_renameargs *args) +{ + const struct qstr *old = args->old_name; + const struct qstr *new = args->new_name; + + encode_diropargs3(xdr, args->old_dir, old->name, old->len); + encode_diropargs3(xdr, args->new_dir, new->name, new->len); } -static int -nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) +/* + * 3.3.15 LINK3args + * + * struct LINK3args { + * nfs_fh3 file; + * diropargs3 link; + * }; + */ +static void nfs3_xdr_enc_link3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_linkargs *args) { - struct rpc_auth *auth = req->rq_cred->cr_auth; - unsigned int replen; + encode_nfs_fh3(xdr, args->fromfh); + encode_diropargs3(xdr, args->tofh, args->toname, args->tolen); +} - p = xdr_encode_fhandle(p, args->fh); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); +/* + * 3.3.16 READDIR3args + * + * struct READDIR3args { + * nfs_fh3 dir; + * cookie3 cookie; + * cookieverf3 cookieverf; + * count3 count; + * }; + */ +static void encode_readdir3args(struct xdr_stream *xdr, + const struct nfs3_readdirargs *args) +{ + __be32 *p; - /* Inline the page array */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); - return 0; + encode_nfs_fh3(xdr, args->fh); + + p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4); + p = xdr_encode_cookie3(p, args->cookie); + p = xdr_encode_cookieverf3(p, args->verf); + *p = cpu_to_be32(args->count); +} + +static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_readdirargs *args) +{ + encode_readdir3args(xdr, args); + prepare_reply_buffer(req, args->pages, 0, + args->count, NFS3_readdirres_sz); } /* - * Decode READLINK reply + * 3.3.17 READDIRPLUS3args + * + * struct READDIRPLUS3args { + * nfs_fh3 dir; + * cookie3 cookie; + * cookieverf3 cookieverf; + * count3 dircount; + * count3 maxcount; + * }; */ -static int -nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +static void encode_readdirplus3args(struct xdr_stream *xdr, + const struct nfs3_readdirargs *args) { - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - size_t hdrlen; - u32 len, recvd; - char *kaddr; - int status; + __be32 *p; - status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, fattr); + encode_nfs_fh3(xdr, args->fh); - if (status != 0) - return nfs_stat_to_errno(status); + p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4 + 4); + p = xdr_encode_cookie3(p, args->cookie); + p = xdr_encode_cookieverf3(p, args->verf); - /* Convert length of symlink */ - len = ntohl(*p++); - if (len >= rcvbuf->page_len) { - dprintk("nfs: server returned giant symlink!\n"); - return -ENAMETOOLONG; - } + /* + * readdirplus: need dircount + buffer size. + * We just make sure we make dircount big enough + */ + *p++ = cpu_to_be32(args->count >> 3); - hdrlen = (u8 *) p - (u8 *) iov->iov_base; - if (iov->iov_len < hdrlen) { - dprintk("NFS: READLINK reply header overflowed:" - "length %Zu > %Zu\n", hdrlen, iov->iov_len); - return -errno_NFSERR_IO; - } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READLINK header is short. " - "iovec will be shifted.\n"); - xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); - } - recvd = req->rq_rcv_buf.len - hdrlen; - if (recvd < len) { - dprintk("NFS: server cheating in readlink reply: " - "count %u > recvd %u\n", len, recvd); - return -EIO; - } + *p = cpu_to_be32(args->count); +} - /* NULL terminate the string we got */ - kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0); - kaddr[len+rcvbuf->page_base] = '\0'; - kunmap_atomic(kaddr, KM_USER0); - return 0; +static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_readdirargs *args) +{ + encode_readdirplus3args(xdr, args); + prepare_reply_buffer(req, args->pages, 0, + args->count, NFS3_readdirres_sz); } /* - * Decode READ reply + * 3.3.21 COMMIT3args + * + * struct COMMIT3args { + * nfs_fh3 file; + * offset3 offset; + * count3 count; + * }; */ -static int -nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) +static void encode_commit3args(struct xdr_stream *xdr, + const struct nfs_writeargs *args) { - struct kvec *iov = req->rq_rcv_buf.head; - size_t hdrlen; - u32 count, ocount, recvd; - int status; + __be32 *p; - status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, res->fattr); + encode_nfs_fh3(xdr, args->fh); - if (status != 0) - return nfs_stat_to_errno(status); + p = xdr_reserve_space(xdr, 8 + 4); + p = xdr_encode_hyper(p, args->offset); + *p = cpu_to_be32(args->count); +} - /* Decode reply count and EOF flag. NFSv3 is somewhat redundant - * in that it puts the count both in the res struct and in the - * opaque data count. */ - count = ntohl(*p++); - res->eof = ntohl(*p++); - ocount = ntohl(*p++); +static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs_writeargs *args) +{ + encode_commit3args(xdr, args); +} - if (ocount != count) { - dprintk("NFS: READ count doesn't match RPC opaque count.\n"); - return -errno_NFSERR_IO; - } +#ifdef CONFIG_NFS_V3_ACL - hdrlen = (u8 *) p - (u8 *) iov->iov_base; - if (iov->iov_len < hdrlen) { - dprintk("NFS: READ reply header overflowed:" - "length %Zu > %Zu\n", hdrlen, iov->iov_len); - return -errno_NFSERR_IO; - } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READ header is short. iovec will be shifted.\n"); - xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); - } +static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_getaclargs *args) +{ + encode_nfs_fh3(xdr, args->fh); + encode_uint32(xdr, args->mask); + if (args->mask & (NFS_ACL | NFS_DFACL)) + prepare_reply_buffer(req, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT, + ACL3_getaclres_sz); +} - recvd = req->rq_rcv_buf.len - hdrlen; - if (count > recvd) { - dprintk("NFS: server cheating in read reply: " - "count %u > recvd %u\n", count, recvd); - count = recvd; - res->eof = 0; - } +static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs3_setaclargs *args) +{ + unsigned int base; + int error; - if (count < res->count) - res->count = count; + encode_nfs_fh3(xdr, NFS_FH(args->inode)); + encode_uint32(xdr, args->mask); - return count; + base = req->rq_slen; + if (args->npages != 0) + xdr_write_pages(xdr, args->pages, 0, args->len); + else + xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + + error = nfsacl_encode(xdr->buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + BUG_ON(error < 0); + error = nfsacl_encode(xdr->buf, base + error, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + BUG_ON(error < 0); } +#endif /* CONFIG_NFS_V3_ACL */ + /* - * Decode WRITE response + * NFSv3 XDR decode functions + * + * NFSv3 result types are defined in section 3.3 of RFC 1813: + * "NFS Version 3 Protocol Specification". */ -static int -nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) -{ - int status; - status = ntohl(*p++); - p = xdr_decode_wcc_data(p, res->fattr); +/* + * 3.3.1 GETATTR3res + * + * struct GETATTR3resok { + * fattr3 obj_attributes; + * }; + * + * union GETATTR3res switch (nfsstat3 status) { + * case NFS3_OK: + * GETATTR3resok resok; + * default: + * void; + * }; + */ +static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_fattr *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_fattr3(xdr, result); +out: + return error; +out_default: + return nfs_stat_to_errno(status); +} - if (status != 0) - return nfs_stat_to_errno(status); +/* + * 3.3.2 SETATTR3res + * + * struct SETATTR3resok { + * wcc_data obj_wcc; + * }; + * + * struct SETATTR3resfail { + * wcc_data obj_wcc; + * }; + * + * union SETATTR3res switch (nfsstat3 status) { + * case NFS3_OK: + * SETATTR3resok resok; + * default: + * SETATTR3resfail resfail; + * }; + */ +static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_fattr *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_wcc_data(xdr, result); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; +out: + return error; +out_status: + return nfs_stat_to_errno(status); +} - res->count = ntohl(*p++); - res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); - res->verf->verifier[0] = *p++; - res->verf->verifier[1] = *p++; +/* + * 3.3.3 LOOKUP3res + * + * struct LOOKUP3resok { + * nfs_fh3 object; + * post_op_attr obj_attributes; + * post_op_attr dir_attributes; + * }; + * + * struct LOOKUP3resfail { + * post_op_attr dir_attributes; + * }; + * + * union LOOKUP3res switch (nfsstat3 status) { + * case NFS3_OK: + * LOOKUP3resok resok; + * default: + * LOOKUP3resfail resfail; + * }; + */ +static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs3_diropres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_nfs_fh3(xdr, result->fh); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->dir_attr); +out: + return error; +out_default: + error = decode_post_op_attr(xdr, result->dir_attr); + if (unlikely(error)) + goto out; + return nfs_stat_to_errno(status); +} - return res->count; +/* + * 3.3.4 ACCESS3res + * + * struct ACCESS3resok { + * post_op_attr obj_attributes; + * uint32 access; + * }; + * + * struct ACCESS3resfail { + * post_op_attr obj_attributes; + * }; + * + * union ACCESS3res switch (nfsstat3 status) { + * case NFS3_OK: + * ACCESS3resok resok; + * default: + * ACCESS3resfail resfail; + * }; + */ +static int nfs3_xdr_dec_access3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs3_accessres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_uint32(xdr, &result->access); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } /* - * Decode a CREATE response + * 3.3.5 READLINK3res + * + * struct READLINK3resok { + * post_op_attr symlink_attributes; + * nfspath3 data; + * }; + * + * struct READLINK3resfail { + * post_op_attr symlink_attributes; + * }; + * + * union READLINK3res switch (nfsstat3 status) { + * case NFS3_OK: + * READLINK3resok resok; + * default: + * READLINK3resfail resfail; + * }; */ -static int -nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) -{ - int status; - - status = ntohl(*p++); - if (status == 0) { - if (*p++) { - if (!(p = xdr_decode_fhandle(p, res->fh))) - return -errno_NFSERR_IO; - p = xdr_decode_post_op_attr(p, res->fattr); - } else { - memset(res->fh, 0, sizeof(*res->fh)); - /* Do decode post_op_attr but set it to NULL */ - p = xdr_decode_post_op_attr(p, res->fattr); - res->fattr->valid = 0; - } - } else { - status = nfs_stat_to_errno(status); - } - p = xdr_decode_wcc_data(p, res->dir_attr); - return status; +static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_fattr *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_nfspath3(xdr); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } /* - * Decode RENAME reply + * 3.3.6 READ3res + * + * struct READ3resok { + * post_op_attr file_attributes; + * count3 count; + * bool eof; + * opaque data<>; + * }; + * + * struct READ3resfail { + * post_op_attr file_attributes; + * }; + * + * union READ3res switch (nfsstat3 status) { + * case NFS3_OK: + * READ3resok resok; + * default: + * READ3resfail resfail; + * }; */ -static int -nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) +static int decode_read3resok(struct xdr_stream *xdr, + struct nfs_readres *result) { - int status; + u32 eof, count, ocount, recvd; + size_t hdrlen; + __be32 *p; + + p = xdr_inline_decode(xdr, 4 + 4 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + count = be32_to_cpup(p++); + eof = be32_to_cpup(p++); + ocount = be32_to_cpup(p++); + if (unlikely(ocount != count)) + goto out_mismatch; + hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + recvd = xdr->buf->len - hdrlen; + if (unlikely(count > recvd)) + goto out_cheating; + +out: + xdr_read_pages(xdr, count); + result->eof = eof; + result->count = count; + return count; +out_mismatch: + dprintk("NFS: READ count doesn't match length of opaque: " + "count %u != ocount %u\n", count, ocount); + return -EIO; +out_cheating: + dprintk("NFS: server cheating in read result: " + "count %u > recvd %u\n", count, recvd); + count = recvd; + eof = 0; + goto out; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} - if ((status = ntohl(*p++)) != 0) - status = nfs_stat_to_errno(status); - p = xdr_decode_wcc_data(p, res->fromattr); - p = xdr_decode_wcc_data(p, res->toattr); - return status; +static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_readres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; + error = decode_read3resok(xdr, result); +out: + return error; +out_status: + return nfs_stat_to_errno(status); } /* - * Decode LINK reply + * 3.3.7 WRITE3res + * + * enum stable_how { + * UNSTABLE = 0, + * DATA_SYNC = 1, + * FILE_SYNC = 2 + * }; + * + * struct WRITE3resok { + * wcc_data file_wcc; + * count3 count; + * stable_how committed; + * writeverf3 verf; + * }; + * + * struct WRITE3resfail { + * wcc_data file_wcc; + * }; + * + * union WRITE3res switch (nfsstat3 status) { + * case NFS3_OK: + * WRITE3resok resok; + * default: + * WRITE3resfail resfail; + * }; */ -static int -nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res) +static int decode_write3resok(struct xdr_stream *xdr, + struct nfs_writeres *result) { - int status; + __be32 *p; + + p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE); + if (unlikely(p == NULL)) + goto out_overflow; + result->count = be32_to_cpup(p++); + result->verf->committed = be32_to_cpup(p++); + if (unlikely(result->verf->committed > NFS_FILE_SYNC)) + goto out_badvalue; + memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE); + return result->count; +out_badvalue: + dprintk("NFS: bad stable_how value: %u\n", result->verf->committed); + return -EIO; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} - if ((status = ntohl(*p++)) != 0) - status = nfs_stat_to_errno(status); - p = xdr_decode_post_op_attr(p, res->fattr); - p = xdr_decode_wcc_data(p, res->dir_attr); - return status; +static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_writeres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_wcc_data(xdr, result->fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; + error = decode_write3resok(xdr, result); +out: + return error; +out_status: + return nfs_stat_to_errno(status); } /* - * Decode FSSTAT reply + * 3.3.8 CREATE3res + * + * struct CREATE3resok { + * post_op_fh3 obj; + * post_op_attr obj_attributes; + * wcc_data dir_wcc; + * }; + * + * struct CREATE3resfail { + * wcc_data dir_wcc; + * }; + * + * union CREATE3res switch (nfsstat3 status) { + * case NFS3_OK: + * CREATE3resok resok; + * default: + * CREATE3resfail resfail; + * }; */ -static int -nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res) +static int decode_create3resok(struct xdr_stream *xdr, + struct nfs3_diropres *result) { - int status; - - status = ntohl(*p++); + int error; + + error = decode_post_op_fh3(xdr, result->fh); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + /* The server isn't required to return a file handle. + * If it didn't, force the client to perform a LOOKUP + * to determine the correct file handle and attribute + * values for the new object. */ + if (result->fh->size == 0) + result->fattr->valid = 0; + error = decode_wcc_data(xdr, result->dir_attr); +out: + return error; +} - p = xdr_decode_post_op_attr(p, res->fattr); - if (status != 0) - return nfs_stat_to_errno(status); +static int nfs3_xdr_dec_create3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs3_diropres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_create3resok(xdr, result); +out: + return error; +out_default: + error = decode_wcc_data(xdr, result->dir_attr); + if (unlikely(error)) + goto out; + return nfs_stat_to_errno(status); +} - p = xdr_decode_hyper(p, &res->tbytes); - p = xdr_decode_hyper(p, &res->fbytes); - p = xdr_decode_hyper(p, &res->abytes); - p = xdr_decode_hyper(p, &res->tfiles); - p = xdr_decode_hyper(p, &res->ffiles); - p = xdr_decode_hyper(p, &res->afiles); +/* + * 3.3.12 REMOVE3res + * + * struct REMOVE3resok { + * wcc_data dir_wcc; + * }; + * + * struct REMOVE3resfail { + * wcc_data dir_wcc; + * }; + * + * union REMOVE3res switch (nfsstat3 status) { + * case NFS3_OK: + * REMOVE3resok resok; + * default: + * REMOVE3resfail resfail; + * }; + */ +static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_removeres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_wcc_data(xdr, result->dir_attr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; +out: + return error; +out_status: + return nfs_stat_to_errno(status); +} - /* ignore invarsec */ - return 0; +/* + * 3.3.14 RENAME3res + * + * struct RENAME3resok { + * wcc_data fromdir_wcc; + * wcc_data todir_wcc; + * }; + * + * struct RENAME3resfail { + * wcc_data fromdir_wcc; + * wcc_data todir_wcc; + * }; + * + * union RENAME3res switch (nfsstat3 status) { + * case NFS3_OK: + * RENAME3resok resok; + * default: + * RENAME3resfail resfail; + * }; + */ +static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_renameres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_wcc_data(xdr, result->old_fattr); + if (unlikely(error)) + goto out; + error = decode_wcc_data(xdr, result->new_fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; +out: + return error; +out_status: + return nfs_stat_to_errno(status); } /* - * Decode FSINFO reply + * 3.3.15 LINK3res + * + * struct LINK3resok { + * post_op_attr file_attributes; + * wcc_data linkdir_wcc; + * }; + * + * struct LINK3resfail { + * post_op_attr file_attributes; + * wcc_data linkdir_wcc; + * }; + * + * union LINK3res switch (nfsstat3 status) { + * case NFS3_OK: + * LINK3resok resok; + * default: + * LINK3resfail resfail; + * }; */ -static int -nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) +static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs3_linkres *result) { - int status; + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + error = decode_wcc_data(xdr, result->dir_attr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; +out: + return error; +out_status: + return nfs_stat_to_errno(status); +} + +/** + * nfs3_decode_dirent - Decode a single NFSv3 directory entry stored in + * the local page cache + * @xdr: XDR stream where entry resides + * @entry: buffer to fill in with entry data + * @plus: boolean indicating whether this should be a readdirplus entry + * + * Returns zero if successful, otherwise a negative errno value is + * returned. + * + * This function is not invoked during READDIR reply decoding, but + * rather whenever an application invokes the getdents(2) system call + * on a directory already in our cache. + * + * 3.3.16 entry3 + * + * struct entry3 { + * fileid3 fileid; + * filename3 name; + * cookie3 cookie; + * fhandle3 filehandle; + * post_op_attr3 attributes; + * entry3 *nextentry; + * }; + * + * 3.3.17 entryplus3 + * struct entryplus3 { + * fileid3 fileid; + * filename3 name; + * cookie3 cookie; + * post_op_attr name_attributes; + * post_op_fh3 name_handle; + * entryplus3 *nextentry; + * }; + */ +int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + int plus) +{ + struct nfs_entry old = *entry; + __be32 *p; + int error; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p == xdr_zero) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p == xdr_zero) + return -EAGAIN; + entry->eof = 1; + return -EBADCOOKIE; + } - status = ntohl(*p++); + error = decode_fileid3(xdr, &entry->ino); + if (unlikely(error)) + return error; - p = xdr_decode_post_op_attr(p, res->fattr); - if (status != 0) - return nfs_stat_to_errno(status); + error = decode_inline_filename3(xdr, &entry->name, &entry->len); + if (unlikely(error)) + return error; - res->rtmax = ntohl(*p++); - res->rtpref = ntohl(*p++); - res->rtmult = ntohl(*p++); - res->wtmax = ntohl(*p++); - res->wtpref = ntohl(*p++); - res->wtmult = ntohl(*p++); - res->dtpref = ntohl(*p++); - p = xdr_decode_hyper(p, &res->maxfilesize); + entry->prev_cookie = entry->cookie; + error = decode_cookie3(xdr, &entry->cookie); + if (unlikely(error)) + return error; + + entry->d_type = DT_UNKNOWN; + + if (plus) { + entry->fattr->valid = 0; + error = decode_post_op_attr(xdr, entry->fattr); + if (unlikely(error)) + return error; + if (entry->fattr->valid & NFS_ATTR_FATTR_V3) + entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + + /* In fact, a post_op_fh3: */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(p == NULL)) + goto out_overflow; + if (*p != xdr_zero) { + error = decode_nfs_fh3(xdr, entry->fh); + if (unlikely(error)) { + if (error == -E2BIG) + goto out_truncated; + return error; + } + } else + zero_nfs_fh3(entry->fh); + } - /* ignore time_delta and properties */ - res->lease_time = 0; return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EAGAIN; +out_truncated: + dprintk("NFS: directory entry contains invalid file handle\n"); + *entry = old; + return -EAGAIN; } /* - * Decode PATHCONF reply + * 3.3.16 READDIR3res + * + * struct dirlist3 { + * entry3 *entries; + * bool eof; + * }; + * + * struct READDIR3resok { + * post_op_attr dir_attributes; + * cookieverf3 cookieverf; + * dirlist3 reply; + * }; + * + * struct READDIR3resfail { + * post_op_attr dir_attributes; + * }; + * + * union READDIR3res switch (nfsstat3 status) { + * case NFS3_OK: + * READDIR3resok resok; + * default: + * READDIR3resfail resfail; + * }; + * + * Read the directory contents into the page cache, but otherwise + * don't touch them. The actual decoding is done by nfs3_decode_entry() + * during subsequent nfs_readdir() calls. */ -static int -nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res) +static int decode_dirlist3(struct xdr_stream *xdr) { - int status; + u32 recvd, pglen; + size_t hdrlen; - status = ntohl(*p++); + pglen = xdr->buf->page_len; + hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + recvd = xdr->buf->len - hdrlen; + if (unlikely(pglen > recvd)) + goto out_cheating; +out: + xdr_read_pages(xdr, pglen); + return pglen; +out_cheating: + dprintk("NFS: server cheating in readdir result: " + "pglen %u > recvd %u\n", pglen, recvd); + pglen = recvd; + goto out; +} - p = xdr_decode_post_op_attr(p, res->fattr); - if (status != 0) - return nfs_stat_to_errno(status); - res->max_link = ntohl(*p++); - res->max_namelen = ntohl(*p++); +static int decode_readdir3resok(struct xdr_stream *xdr, + struct nfs3_readdirres *result) +{ + int error; + + error = decode_post_op_attr(xdr, result->dir_attr); + if (unlikely(error)) + goto out; + /* XXX: do we need to check if result->verf != NULL ? */ + error = decode_cookieverf3(xdr, result->verf); + if (unlikely(error)) + goto out; + error = decode_dirlist3(xdr); +out: + return error; +} - /* ignore remaining fields */ - return 0; +static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs3_readdirres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_readdir3resok(xdr, result); +out: + return error; +out_default: + error = decode_post_op_attr(xdr, result->dir_attr); + if (unlikely(error)) + goto out; + return nfs_stat_to_errno(status); } /* - * Decode COMMIT reply + * 3.3.18 FSSTAT3res + * + * struct FSSTAT3resok { + * post_op_attr obj_attributes; + * size3 tbytes; + * size3 fbytes; + * size3 abytes; + * size3 tfiles; + * size3 ffiles; + * size3 afiles; + * uint32 invarsec; + * }; + * + * struct FSSTAT3resfail { + * post_op_attr obj_attributes; + * }; + * + * union FSSTAT3res switch (nfsstat3 status) { + * case NFS3_OK: + * FSSTAT3resok resok; + * default: + * FSSTAT3resfail resfail; + * }; */ -static int -nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) +static int decode_fsstat3resok(struct xdr_stream *xdr, + struct nfs_fsstat *result) { - int status; + __be32 *p; + + p = xdr_inline_decode(xdr, 8 * 6 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + p = xdr_decode_size3(p, &result->tbytes); + p = xdr_decode_size3(p, &result->fbytes); + p = xdr_decode_size3(p, &result->abytes); + p = xdr_decode_size3(p, &result->tfiles); + p = xdr_decode_size3(p, &result->ffiles); + xdr_decode_size3(p, &result->afiles); + /* ignore invarsec */ + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} - status = ntohl(*p++); - p = xdr_decode_wcc_data(p, res->fattr); - if (status != 0) - return nfs_stat_to_errno(status); +static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_fsstat *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; + error = decode_fsstat3resok(xdr, result); +out: + return error; +out_status: + return nfs_stat_to_errno(status); +} - res->verf->verifier[0] = *p++; - res->verf->verifier[1] = *p++; +/* + * 3.3.19 FSINFO3res + * + * struct FSINFO3resok { + * post_op_attr obj_attributes; + * uint32 rtmax; + * uint32 rtpref; + * uint32 rtmult; + * uint32 wtmax; + * uint32 wtpref; + * uint32 wtmult; + * uint32 dtpref; + * size3 maxfilesize; + * nfstime3 time_delta; + * uint32 properties; + * }; + * + * struct FSINFO3resfail { + * post_op_attr obj_attributes; + * }; + * + * union FSINFO3res switch (nfsstat3 status) { + * case NFS3_OK: + * FSINFO3resok resok; + * default: + * FSINFO3resfail resfail; + * }; + */ +static int decode_fsinfo3resok(struct xdr_stream *xdr, + struct nfs_fsinfo *result) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + result->rtmax = be32_to_cpup(p++); + result->rtpref = be32_to_cpup(p++); + result->rtmult = be32_to_cpup(p++); + result->wtmax = be32_to_cpup(p++); + result->wtpref = be32_to_cpup(p++); + result->wtmult = be32_to_cpup(p++); + result->dtpref = be32_to_cpup(p++); + p = xdr_decode_size3(p, &result->maxfilesize); + xdr_decode_nfstime3(p, &result->time_delta); + + /* ignore properties */ + result->lease_time = 0; return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_fsinfo *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; + error = decode_fsinfo3resok(xdr, result); +out: + return error; +out_status: + return nfs_stat_to_errno(status); } -#ifdef CONFIG_NFS_V3_ACL /* - * Decode GETACL reply + * 3.3.20 PATHCONF3res + * + * struct PATHCONF3resok { + * post_op_attr obj_attributes; + * uint32 linkmax; + * uint32 name_max; + * bool no_trunc; + * bool chown_restricted; + * bool case_insensitive; + * bool case_preserving; + * }; + * + * struct PATHCONF3resfail { + * post_op_attr obj_attributes; + * }; + * + * union PATHCONF3res switch (nfsstat3 status) { + * case NFS3_OK: + * PATHCONF3resok resok; + * default: + * PATHCONF3resfail resfail; + * }; */ -static int -nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p, - struct nfs3_getaclres *res) +static int decode_pathconf3resok(struct xdr_stream *xdr, + struct nfs_pathconf *result) { - struct xdr_buf *buf = &req->rq_rcv_buf; - int status = ntohl(*p++); - struct posix_acl **acl; - unsigned int *aclcnt; - int err, base; - - if (status != 0) - return nfs_stat_to_errno(status); - p = xdr_decode_post_op_attr(p, res->fattr); - res->mask = ntohl(*p++); - if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) - return -EINVAL; - base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + __be32 *p; - acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; - aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; - err = nfsacl_decode(buf, base, aclcnt, acl); + p = xdr_inline_decode(xdr, 4 * 6); + if (unlikely(p == NULL)) + goto out_overflow; + result->max_link = be32_to_cpup(p++); + result->max_namelen = be32_to_cpup(p); + /* ignore remaining fields */ + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} - acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; - aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; - if (err > 0) - err = nfsacl_decode(buf, base + err, aclcnt, acl); - return (err > 0) ? 0 : err; +static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_pathconf *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; + error = decode_pathconf3resok(xdr, result); +out: + return error; +out_status: + return nfs_stat_to_errno(status); } /* - * Decode setacl reply. + * 3.3.21 COMMIT3res + * + * struct COMMIT3resok { + * wcc_data file_wcc; + * writeverf3 verf; + * }; + * + * struct COMMIT3resfail { + * wcc_data file_wcc; + * }; + * + * union COMMIT3res switch (nfsstat3 status) { + * case NFS3_OK: + * COMMIT3resok resok; + * default: + * COMMIT3resfail resfail; + * }; */ -static int -nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) +static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_writeres *result) { - int status = ntohl(*p++); + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + error = decode_wcc_data(xdr, result->fattr); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_status; + error = decode_writeverf3(xdr, result->verf->verifier); +out: + return error; +out_status: + return nfs_stat_to_errno(status); +} - if (status) - return nfs_stat_to_errno(status); - xdr_decode_post_op_attr(p, fattr); - return 0; +#ifdef CONFIG_NFS_V3_ACL + +static inline int decode_getacl3resok(struct xdr_stream *xdr, + struct nfs3_getaclres *result) +{ + struct posix_acl **acl; + unsigned int *aclcnt; + size_t hdrlen; + int error; + + error = decode_post_op_attr(xdr, result->fattr); + if (unlikely(error)) + goto out; + error = decode_uint32(xdr, &result->mask); + if (unlikely(error)) + goto out; + error = -EINVAL; + if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + goto out; + + hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + + acl = NULL; + if (result->mask & NFS_ACL) + acl = &result->acl_access; + aclcnt = NULL; + if (result->mask & NFS_ACLCNT) + aclcnt = &result->acl_access_count; + error = nfsacl_decode(xdr->buf, hdrlen, aclcnt, acl); + if (unlikely(error <= 0)) + goto out; + + acl = NULL; + if (result->mask & NFS_DFACL) + acl = &result->acl_default; + aclcnt = NULL; + if (result->mask & NFS_DFACLCNT) + aclcnt = &result->acl_default_count; + error = nfsacl_decode(xdr->buf, hdrlen + error, aclcnt, acl); + if (unlikely(error <= 0)) + return error; + error = 0; +out: + return error; +} + +static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs3_getaclres *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_getacl3resok(xdr, result); +out: + return error; +out_default: + return nfs_stat_to_errno(status); } + +static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_fattr *result) +{ + enum nfs_stat status; + int error; + + error = decode_nfsstat3(xdr, &status); + if (unlikely(error)) + goto out; + if (status != NFS3_OK) + goto out_default; + error = decode_post_op_attr(xdr, result); +out: + return error; +out_default: + return nfs_stat_to_errno(status); +} + #endif /* CONFIG_NFS_V3_ACL */ #define PROC(proc, argtype, restype, timer) \ [NFS3PROC_##proc] = { \ .p_proc = NFS3PROC_##proc, \ - .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ - .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ - .p_arglen = NFS3_##argtype##_sz, \ - .p_replen = NFS3_##restype##_sz, \ + .p_encode = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args, \ + .p_decode = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res, \ + .p_arglen = NFS3_##argtype##args_sz, \ + .p_replen = NFS3_##restype##res_sz, \ .p_timer = timer, \ .p_statidx = NFS3PROC_##proc, \ .p_name = #proc, \ } struct rpc_procinfo nfs3_procedures[] = { - PROC(GETATTR, fhandle, attrstat, 1), - PROC(SETATTR, sattrargs, wccstat, 0), - PROC(LOOKUP, diropargs, lookupres, 2), - PROC(ACCESS, accessargs, accessres, 1), - PROC(READLINK, readlinkargs, readlinkres, 3), - PROC(READ, readargs, readres, 3), - PROC(WRITE, writeargs, writeres, 4), - PROC(CREATE, createargs, createres, 0), - PROC(MKDIR, mkdirargs, createres, 0), - PROC(SYMLINK, symlinkargs, createres, 0), - PROC(MKNOD, mknodargs, createres, 0), - PROC(REMOVE, removeargs, removeres, 0), - PROC(RMDIR, diropargs, wccstat, 0), - PROC(RENAME, renameargs, renameres, 0), - PROC(LINK, linkargs, linkres, 0), - PROC(READDIR, readdirargs, readdirres, 3), - PROC(READDIRPLUS, readdirargs, readdirres, 3), - PROC(FSSTAT, fhandle, fsstatres, 0), - PROC(FSINFO, fhandle, fsinfores, 0), - PROC(PATHCONF, fhandle, pathconfres, 0), - PROC(COMMIT, commitargs, commitres, 5), + PROC(GETATTR, getattr, getattr, 1), + PROC(SETATTR, setattr, setattr, 0), + PROC(LOOKUP, lookup, lookup, 2), + PROC(ACCESS, access, access, 1), + PROC(READLINK, readlink, readlink, 3), + PROC(READ, read, read, 3), + PROC(WRITE, write, write, 4), + PROC(CREATE, create, create, 0), + PROC(MKDIR, mkdir, create, 0), + PROC(SYMLINK, symlink, create, 0), + PROC(MKNOD, mknod, create, 0), + PROC(REMOVE, remove, remove, 0), + PROC(RMDIR, lookup, setattr, 0), + PROC(RENAME, rename, rename, 0), + PROC(LINK, link, link, 0), + PROC(READDIR, readdir, readdir, 3), + PROC(READDIRPLUS, readdirplus, readdir, 3), + PROC(FSSTAT, getattr, fsstat, 0), + PROC(FSINFO, getattr, fsinfo, 0), + PROC(PATHCONF, getattr, pathconf, 0), + PROC(COMMIT, commit, commit, 5), }; struct rpc_version nfs_version3 = { @@ -1181,8 +2471,8 @@ struct rpc_version nfs_version3 = { static struct rpc_procinfo nfs3_acl_procedures[] = { [ACLPROC3_GETACL] = { .p_proc = ACLPROC3_GETACL, - .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, - .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args, + .p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res, .p_arglen = ACL3_getaclargs_sz, .p_replen = ACL3_getaclres_sz, .p_timer = 1, @@ -1190,8 +2480,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { }, [ACLPROC3_SETACL] = { .p_proc = ACLPROC3_SETACL, - .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, - .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args, + .p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res, .p_arglen = ACL3_setaclargs_sz, .p_replen = ACL3_setaclres_sz, .p_timer = 0, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 311e15cc8af..7a747407314 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -44,6 +44,7 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, + NFS4CLNT_LAYOUTRECALL, NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, }; @@ -109,7 +110,7 @@ struct nfs_unique_id { struct nfs4_state_owner { struct nfs_unique_id so_owner_id; struct nfs_server *so_server; - struct rb_node so_client_node; + struct rb_node so_server_node; struct rpc_cred *so_cred; /* Associated cred */ @@ -227,12 +228,6 @@ struct nfs4_state_maintenance_ops { extern const struct dentry_operations nfs4_dentry_operations; extern const struct inode_operations nfs4_dir_inode_operations; -/* inode.c */ -extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); -extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); -extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); - - /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); @@ -241,13 +236,12 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); -extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); extern void nfs4_release_lockowner(const struct nfs4_lock_state *); +extern const struct xattr_handler *nfs4_xattr_handlers[]; #if defined(CONFIG_NFS_V4_1) static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) @@ -333,7 +327,6 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid); extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ -extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; struct nfs4_mount_data; diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c new file mode 100644 index 00000000000..23f930caf1e --- /dev/null +++ b/fs/nfs/nfs4filelayout.c @@ -0,0 +1,280 @@ +/* + * Module for the pnfs nfs4 file layout driver. + * Defines all I/O and Policy interface operations, plus code + * to register itself with the pNFS client. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include <linux/nfs_fs.h> + +#include "internal.h" +#include "nfs4filelayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); +MODULE_DESCRIPTION("The NFSv4 file layout driver"); + +static int +filelayout_set_layoutdriver(struct nfs_server *nfss) +{ + int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, + nfs4_fl_free_deviceid_callback); + if (status) { + printk(KERN_WARNING "%s: deviceid cache could not be " + "initialized\n", __func__); + return status; + } + dprintk("%s: deviceid cache has been initialized successfully\n", + __func__); + return 0; +} + +/* Clear out the layout by destroying its device list */ +static int +filelayout_clear_layoutdriver(struct nfs_server *nfss) +{ + dprintk("--> %s\n", __func__); + + if (nfss->nfs_client->cl_devid_cache) + pnfs_put_deviceid_cache(nfss->nfs_client); + return 0; +} + +/* + * filelayout_check_layout() + * + * Make sure layout segment parameters are sane WRT the device. + * At this point no generic layer initialization of the lseg has occurred, + * and nothing has been added to the layout_hdr cache. + * + */ +static int +filelayout_check_layout(struct pnfs_layout_hdr *lo, + struct nfs4_filelayout_segment *fl, + struct nfs4_layoutget_res *lgr, + struct nfs4_deviceid *id) +{ + struct nfs4_file_layout_dsaddr *dsaddr; + int status = -EINVAL; + struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); + + dprintk("--> %s\n", __func__); + + if (fl->pattern_offset > lgr->range.offset) { + dprintk("%s pattern_offset %lld to large\n", + __func__, fl->pattern_offset); + goto out; + } + + if (fl->stripe_unit % PAGE_SIZE) { + dprintk("%s Stripe unit (%u) not page aligned\n", + __func__, fl->stripe_unit); + goto out; + } + + /* find and reference the deviceid */ + dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); + if (dsaddr == NULL) { + dsaddr = get_device_info(lo->plh_inode, id); + if (dsaddr == NULL) + goto out; + } + fl->dsaddr = dsaddr; + + if (fl->first_stripe_index < 0 || + fl->first_stripe_index >= dsaddr->stripe_count) { + dprintk("%s Bad first_stripe_index %d\n", + __func__, fl->first_stripe_index); + goto out_put; + } + + if ((fl->stripe_type == STRIPE_SPARSE && + fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || + (fl->stripe_type == STRIPE_DENSE && + fl->num_fh != dsaddr->stripe_count)) { + dprintk("%s num_fh %u not valid for given packing\n", + __func__, fl->num_fh); + goto out_put; + } + + if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { + dprintk("%s Stripe unit (%u) not aligned with rsize %u " + "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, + nfss->wsize); + } + + status = 0; +out: + dprintk("--> %s returns %d\n", __func__, status); + return status; +out_put: + pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); + goto out; +} + +static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) +{ + int i; + + for (i = 0; i < fl->num_fh; i++) { + if (!fl->fh_array[i]) + break; + kfree(fl->fh_array[i]); + } + kfree(fl->fh_array); + fl->fh_array = NULL; +} + +static void +_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) +{ + filelayout_free_fh_array(fl); + kfree(fl); +} + +static int +filelayout_decode_layout(struct pnfs_layout_hdr *flo, + struct nfs4_filelayout_segment *fl, + struct nfs4_layoutget_res *lgr, + struct nfs4_deviceid *id) +{ + uint32_t *p = (uint32_t *)lgr->layout.buf; + uint32_t nfl_util; + int i; + + dprintk("%s: set_layout_map Begin\n", __func__); + + memcpy(id, p, sizeof(*id)); + p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); + print_deviceid(id); + + nfl_util = be32_to_cpup(p++); + if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) + fl->commit_through_mds = 1; + if (nfl_util & NFL4_UFLG_DENSE) + fl->stripe_type = STRIPE_DENSE; + else + fl->stripe_type = STRIPE_SPARSE; + fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK; + + fl->first_stripe_index = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &fl->pattern_offset); + fl->num_fh = be32_to_cpup(p++); + + dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n", + __func__, nfl_util, fl->num_fh, fl->first_stripe_index, + fl->pattern_offset); + + fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), + GFP_KERNEL); + if (!fl->fh_array) + return -ENOMEM; + + for (i = 0; i < fl->num_fh; i++) { + /* Do we want to use a mempool here? */ + fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); + if (!fl->fh_array[i]) { + filelayout_free_fh_array(fl); + return -ENOMEM; + } + fl->fh_array[i]->size = be32_to_cpup(p++); + if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { + printk(KERN_ERR "Too big fh %d received %d\n", + i, fl->fh_array[i]->size); + filelayout_free_fh_array(fl); + return -EIO; + } + memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); + p += XDR_QUADLEN(fl->fh_array[i]->size); + dprintk("DEBUG: %s: fh len %d\n", __func__, + fl->fh_array[i]->size); + } + + return 0; +} + +static struct pnfs_layout_segment * +filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, + struct nfs4_layoutget_res *lgr) +{ + struct nfs4_filelayout_segment *fl; + int rc; + struct nfs4_deviceid id; + + dprintk("--> %s\n", __func__); + fl = kzalloc(sizeof(*fl), GFP_KERNEL); + if (!fl) + return NULL; + + rc = filelayout_decode_layout(layoutid, fl, lgr, &id); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { + _filelayout_free_lseg(fl); + return NULL; + } + return &fl->generic_hdr; +} + +static void +filelayout_free_lseg(struct pnfs_layout_segment *lseg) +{ + struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode); + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + + dprintk("--> %s\n", __func__); + pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, + &fl->dsaddr->deviceid); + _filelayout_free_lseg(fl); +} + +static struct pnfs_layoutdriver_type filelayout_type = { + .id = LAYOUT_NFSV4_1_FILES, + .name = "LAYOUT_NFSV4_1_FILES", + .owner = THIS_MODULE, + .set_layoutdriver = filelayout_set_layoutdriver, + .clear_layoutdriver = filelayout_clear_layoutdriver, + .alloc_lseg = filelayout_alloc_lseg, + .free_lseg = filelayout_free_lseg, +}; + +static int __init nfs4filelayout_init(void) +{ + printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n", + __func__); + return pnfs_register_layoutdriver(&filelayout_type); +} + +static void __exit nfs4filelayout_exit(void) +{ + printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n", + __func__); + pnfs_unregister_layoutdriver(&filelayout_type); +} + +module_init(nfs4filelayout_init); +module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h new file mode 100644 index 00000000000..bbf60dd2ab9 --- /dev/null +++ b/fs/nfs/nfs4filelayout.h @@ -0,0 +1,94 @@ +/* + * NFSv4 file layout driver data structures. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#ifndef FS_NFS_NFS4FILELAYOUT_H +#define FS_NFS_NFS4FILELAYOUT_H + +#include "pnfs.h" + +/* + * Field testing shows we need to support upto 4096 stripe indices. + * We store each index as a u8 (u32 on the wire) to keep the memory footprint + * reasonable. This in turn means we support a maximum of 256 + * RFC 5661 multipath_list4 structures. + */ +#define NFS4_PNFS_MAX_STRIPE_CNT 4096 +#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ + +enum stripetype4 { + STRIPE_SPARSE = 1, + STRIPE_DENSE = 2 +}; + +/* Individual ip address */ +struct nfs4_pnfs_ds { + struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + u32 ds_ip_addr; + u32 ds_port; + struct nfs_client *ds_clp; + atomic_t ds_count; +}; + +struct nfs4_file_layout_dsaddr { + struct pnfs_deviceid_node deviceid; + u32 stripe_count; + u8 *stripe_indices; + u32 ds_num; + struct nfs4_pnfs_ds *ds_list[1]; +}; + +struct nfs4_filelayout_segment { + struct pnfs_layout_segment generic_hdr; + u32 stripe_type; + u32 commit_through_mds; + u32 stripe_unit; + u32 first_stripe_index; + u64 pattern_offset; + struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ + unsigned int num_fh; + struct nfs_fh **fh_array; +}; + +static inline struct nfs4_filelayout_segment * +FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) +{ + return container_of(lseg, + struct nfs4_filelayout_segment, + generic_hdr); +} + +extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); +extern void print_ds(struct nfs4_pnfs_ds *ds); +extern void print_deviceid(struct nfs4_deviceid *dev_id); +extern struct nfs4_file_layout_dsaddr * +nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); +struct nfs4_file_layout_dsaddr * +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); + +#endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c new file mode 100644 index 00000000000..f5c9b125e8c --- /dev/null +++ b/fs/nfs/nfs4filelayoutdev.c @@ -0,0 +1,453 @@ +/* + * Device operations for the pnfs nfs4 file layout driver. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * Garth Goodson <Garth.Goodson@netapp.com> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include <linux/nfs_fs.h> +#include <linux/vmalloc.h> + +#include "internal.h" +#include "nfs4filelayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +/* + * Data server cache + * + * Data servers can be mapped to different device ids. + * nfs4_pnfs_ds reference counting + * - set to 1 on allocation + * - incremented when a device id maps a data server already in the cache. + * - decremented when deviceid is removed from the cache. + */ +DEFINE_SPINLOCK(nfs4_ds_cache_lock); +static LIST_HEAD(nfs4_data_server_cache); + +/* Debug routines */ +void +print_ds(struct nfs4_pnfs_ds *ds) +{ + if (ds == NULL) { + printk("%s NULL device\n", __func__); + return; + } + printk(" ip_addr %x port %hu\n" + " ref count %d\n" + " client %p\n" + " cl_exchange_flags %x\n", + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + atomic_read(&ds->ds_count), ds->ds_clp, + ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); +} + +void +print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) +{ + int i; + + ifdebug(FACILITY) { + printk("%s dsaddr->ds_num %d\n", __func__, + dsaddr->ds_num); + for (i = 0; i < dsaddr->ds_num; i++) + print_ds(dsaddr->ds_list[i]); + } +} + +void print_deviceid(struct nfs4_deviceid *id) +{ + u32 *p = (u32 *)id; + + dprintk("%s: device id= [%x%x%x%x]\n", __func__, + p[0], p[1], p[2], p[3]); +} + +/* nfs4_ds_cache_lock is held */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(u32 ip_addr, u32 port) +{ + struct nfs4_pnfs_ds *ds; + + dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", + ntohl(ip_addr), ntohs(port)); + + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { + if (ds->ds_ip_addr == ip_addr && + ds->ds_port == port) { + return ds; + } + } + return NULL; +} + +static void +destroy_ds(struct nfs4_pnfs_ds *ds) +{ + dprintk("--> %s\n", __func__); + ifdebug(FACILITY) + print_ds(ds); + + if (ds->ds_clp) + nfs_put_client(ds->ds_clp); + kfree(ds); +} + +static void +nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) +{ + struct nfs4_pnfs_ds *ds; + int i; + + print_deviceid(&dsaddr->deviceid.de_id); + + for (i = 0; i < dsaddr->ds_num; i++) { + ds = dsaddr->ds_list[i]; + if (ds != NULL) { + if (atomic_dec_and_lock(&ds->ds_count, + &nfs4_ds_cache_lock)) { + list_del_init(&ds->ds_node); + spin_unlock(&nfs4_ds_cache_lock); + destroy_ds(ds); + } + } + } + kfree(dsaddr->stripe_indices); + kfree(dsaddr); +} + +void +nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device) +{ + struct nfs4_file_layout_dsaddr *dsaddr = + container_of(device, struct nfs4_file_layout_dsaddr, deviceid); + + nfs4_fl_free_deviceid(dsaddr); +} + +static struct nfs4_pnfs_ds * +nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) +{ + struct nfs4_pnfs_ds *tmp_ds, *ds; + + ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); + if (!ds) + goto out; + + spin_lock(&nfs4_ds_cache_lock); + tmp_ds = _data_server_lookup_locked(ip_addr, port); + if (tmp_ds == NULL) { + ds->ds_ip_addr = ip_addr; + ds->ds_port = port; + atomic_set(&ds->ds_count, 1); + INIT_LIST_HEAD(&ds->ds_node); + ds->ds_clp = NULL; + list_add(&ds->ds_node, &nfs4_data_server_cache); + dprintk("%s add new data server ip 0x%x\n", __func__, + ds->ds_ip_addr); + } else { + kfree(ds); + atomic_inc(&tmp_ds->ds_count); + dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_ip_addr, + atomic_read(&tmp_ds->ds_count)); + ds = tmp_ds; + } + spin_unlock(&nfs4_ds_cache_lock); +out: + return ds; +} + +/* + * Currently only support ipv4, and one multi-path address. + */ +static struct nfs4_pnfs_ds * +decode_and_add_ds(__be32 **pp, struct inode *inode) +{ + struct nfs4_pnfs_ds *ds = NULL; + char *buf; + const char *ipend, *pstr; + u32 ip_addr, port; + int nlen, rlen, i; + int tmp[2]; + __be32 *r_netid, *r_addr, *p = *pp; + + /* r_netid */ + nlen = be32_to_cpup(p++); + r_netid = p; + p += XDR_QUADLEN(nlen); + + /* r_addr */ + rlen = be32_to_cpup(p++); + r_addr = p; + p += XDR_QUADLEN(rlen); + *pp = p; + + /* Check that netid is "tcp" */ + if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { + dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); + goto out_err; + } + + /* ipv6 length plus port is legal */ + if (rlen > INET6_ADDRSTRLEN + 8) { + dprintk("%s: Invalid address, length %d\n", __func__, + rlen); + goto out_err; + } + buf = kmalloc(rlen + 1, GFP_KERNEL); + buf[rlen] = '\0'; + memcpy(buf, r_addr, rlen); + + /* replace the port dots with dashes for the in4_pton() delimiter*/ + for (i = 0; i < 2; i++) { + char *res = strrchr(buf, '.'); + if (!res) { + dprintk("%s: Failed finding expected dots in port\n", + __func__); + goto out_free; + } + *res = '-'; + } + + /* Currently only support ipv4 address */ + if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { + dprintk("%s: Only ipv4 addresses supported\n", __func__); + goto out_free; + } + + /* port */ + pstr = ipend; + sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); + port = htons((tmp[0] << 8) | (tmp[1])); + + ds = nfs4_pnfs_ds_add(inode, ip_addr, port); + dprintk("%s: Decoded address and port %s\n", __func__, buf); +out_free: + kfree(buf); +out_err: + return ds; +} + +/* Decode opaque device data and return the result */ +static struct nfs4_file_layout_dsaddr* +decode_device(struct inode *ino, struct pnfs_device *pdev) +{ + int i, dummy; + u32 cnt, num; + u8 *indexp; + __be32 *p = (__be32 *)pdev->area, *indicesp; + struct nfs4_file_layout_dsaddr *dsaddr; + + /* Get the stripe count (number of stripe index) */ + cnt = be32_to_cpup(p++); + dprintk("%s stripe count %d\n", __func__, cnt); + if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { + printk(KERN_WARNING "%s: stripe count %d greater than " + "supported maximum %d\n", __func__, + cnt, NFS4_PNFS_MAX_STRIPE_CNT); + goto out_err; + } + + /* Check the multipath list count */ + indicesp = p; + p += XDR_QUADLEN(cnt << 2); + num = be32_to_cpup(p++); + dprintk("%s ds_num %u\n", __func__, num); + if (num > NFS4_PNFS_MAX_MULTI_CNT) { + printk(KERN_WARNING "%s: multipath count %d greater than " + "supported maximum %d\n", __func__, + num, NFS4_PNFS_MAX_MULTI_CNT); + goto out_err; + } + dsaddr = kzalloc(sizeof(*dsaddr) + + (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), + GFP_KERNEL); + if (!dsaddr) + goto out_err; + + dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); + if (!dsaddr->stripe_indices) + goto out_err_free; + + dsaddr->stripe_count = cnt; + dsaddr->ds_num = num; + + memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); + + /* Go back an read stripe indices */ + p = indicesp; + indexp = &dsaddr->stripe_indices[0]; + for (i = 0; i < dsaddr->stripe_count; i++) { + *indexp = be32_to_cpup(p++); + if (*indexp >= num) + goto out_err_free; + indexp++; + } + /* Skip already read multipath list count */ + p++; + + for (i = 0; i < dsaddr->ds_num; i++) { + int j; + + dummy = be32_to_cpup(p++); /* multipath count */ + if (dummy > 1) { + printk(KERN_WARNING + "%s: Multipath count %d not supported, " + "skipping all greater than 1\n", __func__, + dummy); + } + for (j = 0; j < dummy; j++) { + if (j == 0) { + dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); + if (dsaddr->ds_list[i] == NULL) + goto out_err_free; + } else { + u32 len; + /* skip extra multipath */ + len = be32_to_cpup(p++); + p += XDR_QUADLEN(len); + len = be32_to_cpup(p++); + p += XDR_QUADLEN(len); + continue; + } + } + } + return dsaddr; + +out_err_free: + nfs4_fl_free_deviceid(dsaddr); +out_err: + dprintk("%s ERROR: returning NULL\n", __func__); + return NULL; +} + +/* + * Decode the opaque device specified in 'dev' + * and add it to the list of available devices. + * If the deviceid is already cached, nfs4_add_deviceid will return + * a pointer to the cached struct and throw away the new. + */ +static struct nfs4_file_layout_dsaddr* +decode_and_add_device(struct inode *inode, struct pnfs_device *dev) +{ + struct nfs4_file_layout_dsaddr *dsaddr; + struct pnfs_deviceid_node *d; + + dsaddr = decode_device(inode, dev); + if (!dsaddr) { + printk(KERN_WARNING "%s: Could not decode or add device\n", + __func__); + return NULL; + } + + d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, + &dsaddr->deviceid); + + return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); +} + +/* + * Retrieve the information for dev_id, add it to the list + * of available devices, and return it. + */ +struct nfs4_file_layout_dsaddr * +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) +{ + struct pnfs_device *pdev = NULL; + u32 max_resp_sz; + int max_pages; + struct page **pages = NULL; + struct nfs4_file_layout_dsaddr *dsaddr = NULL; + int rc, i; + struct nfs_server *server = NFS_SERVER(inode); + + /* + * Use the session max response size as the basis for setting + * GETDEVICEINFO's maxcount + */ + max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + max_pages = max_resp_sz >> PAGE_SHIFT; + dprintk("%s inode %p max_resp_sz %u max_pages %d\n", + __func__, inode, max_resp_sz, max_pages); + + pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); + if (pdev == NULL) + return NULL; + + pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + kfree(pdev); + return NULL; + } + for (i = 0; i < max_pages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free; + } + + /* set pdev->area */ + pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); + if (!pdev->area) + goto out_free; + + memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); + pdev->layout_type = LAYOUT_NFSV4_1_FILES; + pdev->pages = pages; + pdev->pgbase = 0; + pdev->pglen = PAGE_SIZE * max_pages; + pdev->mincount = 0; + + rc = nfs4_proc_getdeviceinfo(server, pdev); + dprintk("%s getdevice info returns %d\n", __func__, rc); + if (rc) + goto out_free; + + /* + * Found new device, need to decode it and then add it to the + * list of known devices for this mountpoint. + */ + dsaddr = decode_and_add_device(inode, pdev); +out_free: + if (pdev->area != NULL) + vunmap(pdev->area); + for (i = 0; i < max_pages; i++) + __free_page(pages[i]); + kfree(pages); + kfree(pdev); + dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); + return dsaddr; +} + +struct nfs4_file_layout_dsaddr * +nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) +{ + struct pnfs_deviceid_node *d; + + d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); + return (d == NULL) ? NULL : + container_of(d, struct nfs4_file_layout_dsaddr, deviceid); +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 089da5b5d20..78936a8f40a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -49,12 +49,15 @@ #include <linux/mount.h> #include <linux/module.h> #include <linux/sunrpc/bc_xprt.h> +#include <linux/xattr.h> +#include <linux/utsname.h> #include "nfs4_fs.h" #include "delegation.h" #include "internal.h" #include "iostat.h" #include "callback.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -129,7 +132,8 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_LEASE_TIME, - 0 + FATTR4_WORD1_TIME_DELTA + | FATTR4_WORD1_FS_LAYOUT_TYPES }; const u32 nfs4_fs_locations_bitmap[2] = { @@ -255,9 +259,6 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, nfs4_state_mark_reclaim_nograce(clp, state); goto do_state_recovery; case -NFS4ERR_STALE_STATEID: - if (state == NULL) - break; - nfs4_state_mark_reclaim_reboot(clp, state); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: goto do_state_recovery; @@ -334,10 +335,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * Must be called while holding tbl->slot_tbl_lock */ static void -nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) +nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot) { + int free_slotid = free_slot - tbl->slots; int slotid = free_slotid; + BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); @@ -354,9 +357,9 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) } /* - * Signal state manager thread if session is drained + * Signal state manager thread if session fore channel is drained */ -static void nfs41_check_drain_session_complete(struct nfs4_session *ses) +static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) { struct rpc_task *task; @@ -370,8 +373,20 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses) if (ses->fc_slot_table.highest_used_slotid != -1) return; - dprintk("%s COMPLETE: Session Drained\n", __func__); - complete(&ses->complete); + dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); + complete(&ses->fc_slot_table.complete); +} + +/* + * Signal state manager thread if session back channel is drained + */ +void nfs4_check_drain_bc_complete(struct nfs4_session *ses) +{ + if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || + ses->bc_slot_table.highest_used_slotid != -1) + return; + dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); + complete(&ses->bc_slot_table.complete); } static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) @@ -379,7 +394,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) struct nfs4_slot_table *tbl; tbl = &res->sr_session->fc_slot_table; - if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { + if (!res->sr_slot) { /* just wake up the next guy waiting since * we may have not consumed a slot after all */ dprintk("%s: No slot\n", __func__); @@ -387,17 +402,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) } spin_lock(&tbl->slot_tbl_lock); - nfs4_free_slot(tbl, res->sr_slotid); - nfs41_check_drain_session_complete(res->sr_session); + nfs4_free_slot(tbl, res->sr_slot); + nfs4_check_drain_fc_complete(res->sr_session); spin_unlock(&tbl->slot_tbl_lock); - res->sr_slotid = NFS4_MAX_SLOT_TABLE; + res->sr_slot = NULL; } static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { unsigned long timestamp; - struct nfs4_slot_table *tbl; - struct nfs4_slot *slot; struct nfs_client *clp; /* @@ -410,17 +423,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * res->sr_status = NFS_OK; /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ - if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) + if (!res->sr_slot) goto out; - tbl = &res->sr_session->fc_slot_table; - slot = tbl->slots + res->sr_slotid; - /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: /* Update the slot's sequence and clientid lease timer */ - ++slot->seq_nr; + ++res->sr_slot->seq_nr; timestamp = res->sr_renewal_time; clp = res->sr_session->clp; do_renew_lease(clp, timestamp); @@ -433,12 +443,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * returned NFS4ERR_DELAY as per Section 2.10.6.2 * of RFC5661. */ - dprintk("%s: slot=%d seq=%d: Operation in progress\n", - __func__, res->sr_slotid, slot->seq_nr); + dprintk("%s: slot=%td seq=%d: Operation in progress\n", + __func__, + res->sr_slot - res->sr_session->fc_slot_table.slots, + res->sr_slot->seq_nr); goto out_retry; default: /* Just update the slot sequence no. */ - ++slot->seq_nr; + ++res->sr_slot->seq_nr; } out: /* The session may be reset by one of the error handlers. */ @@ -505,10 +517,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session, dprintk("--> %s\n", __func__); /* slot already allocated? */ - if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) + if (res->sr_slot != NULL) return 0; - res->sr_slotid = NFS4_MAX_SLOT_TABLE; tbl = &session->fc_slot_table; spin_lock(&tbl->slot_tbl_lock); @@ -550,7 +561,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); res->sr_session = session; - res->sr_slotid = slotid; + res->sr_slot = slot; res->sr_renewal_time = jiffies; res->sr_status_flags = 0; /* @@ -576,8 +587,9 @@ int nfs4_setup_sequence(const struct nfs_server *server, goto out; } - dprintk("--> %s clp %p session %p sr_slotid %d\n", - __func__, session->clp, session, res->sr_slotid); + dprintk("--> %s clp %p session %p sr_slot %td\n", + __func__, session->clp, session, res->sr_slot ? + res->sr_slot - session->fc_slot_table.slots : -1); ret = nfs41_setup_sequence(session, args, res, cache_reply, task); @@ -650,7 +662,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server, .callback_data = &data }; - res->sr_slotid = NFS4_MAX_SLOT_TABLE; + res->sr_slot = NULL; if (privileged) task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); @@ -735,7 +747,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); nfs_fattr_init(&p->dir_attr); - p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, @@ -1120,6 +1131,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * clear_bit(NFS_DELEGATED_STATE, &state->flags); smp_rmb(); if (state->n_rdwr != 0) { + clear_bit(NFS_O_RDWR_STATE, &state->flags); ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); if (ret != 0) return ret; @@ -1127,6 +1139,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * return -ESTALE; } if (state->n_wronly != 0) { + clear_bit(NFS_O_WRONLY_STATE, &state->flags); ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); if (ret != 0) return ret; @@ -1134,6 +1147,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * return -ESTALE; } if (state->n_rdonly != 0) { + clear_bit(NFS_O_RDONLY_STATE, &state->flags); ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); if (ret != 0) return ret; @@ -1188,7 +1202,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); - if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) + if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -1258,6 +1272,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + case -EKEYEXPIRED: + /* + * User RPCSEC_GSS context has expired. + * We cannot recover this stateid now, so + * skip it and allow recovery thread to + * proceed. + */ case -ENOMEM: err = 0; goto out; @@ -1605,7 +1626,6 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state goto out; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - case -EKEYEXPIRED: nfs4_handle_exception(server, err, &exception); err = 0; } @@ -1820,6 +1840,8 @@ struct nfs4_closedata { struct nfs_closeres res; struct nfs_fattr fattr; unsigned long timestamp; + bool roc; + u32 roc_barrier; }; static void nfs4_free_closedata(void *data) @@ -1827,6 +1849,8 @@ static void nfs4_free_closedata(void *data) struct nfs4_closedata *calldata = data; struct nfs4_state_owner *sp = calldata->state->owner; + if (calldata->roc) + pnfs_roc_release(calldata->state->inode); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); @@ -1859,6 +1883,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) */ switch (task->tk_status) { case 0: + if (calldata->roc) + pnfs_roc_set_barrier(state->inode, + calldata->roc_barrier); nfs_set_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); nfs4_close_clear_stateid_flags(state, @@ -1911,8 +1938,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) return; } - if (calldata->arg.fmode == 0) + if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; + if (calldata->roc && + pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { + rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, + task, NULL); + return; + } + } nfs_fattr_init(calldata->res.fattr); calldata->timestamp = jiffies; @@ -1940,7 +1974,7 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait) +int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; @@ -1975,12 +2009,12 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + calldata->roc = roc; path_get(path); calldata->path = *path; - msg.rpc_argp = &calldata->arg, - msg.rpc_resp = &calldata->res, + msg.rpc_argp = &calldata->arg; + msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -1993,125 +2027,24 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i out_free_calldata: kfree(calldata); out: + if (roc) + pnfs_roc_release(state->inode); nfs4_put_open_state(state); nfs4_put_state_owner(sp); return status; } -static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) -{ - struct file *filp; - int ret; - - /* If the open_intent is for execute, we have an extra check to make */ - if (fmode & FMODE_EXEC) { - ret = nfs_may_open(state->inode, - state->owner->so_cred, - nd->intent.open.flags); - if (ret < 0) - goto out_close; - } - filp = lookup_instantiate_filp(nd, path->dentry, NULL); - if (!IS_ERR(filp)) { - struct nfs_open_context *ctx; - ctx = nfs_file_open_context(filp); - ctx->state = state; - return 0; - } - ret = PTR_ERR(filp); -out_close: - nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE)); - return ret; -} - -struct dentry * -nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct inode * +nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) { - struct path path = { - .mnt = nd->path.mnt, - .dentry = dentry, - }; - struct dentry *parent; - struct iattr attr; - struct rpc_cred *cred; struct nfs4_state *state; - struct dentry *res; - int open_flags = nd->intent.open.flags; - fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); - - if (nd->flags & LOOKUP_CREATE) { - attr.ia_mode = nd->intent.open.create_mode; - attr.ia_valid = ATTR_MODE; - if (!IS_POSIXACL(dir)) - attr.ia_mode &= ~current_umask(); - } else { - open_flags &= ~O_EXCL; - attr.ia_valid = 0; - BUG_ON(open_flags & O_CREAT); - } - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return (struct dentry *)cred; - parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ - nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); - put_rpccred(cred); - if (IS_ERR(state)) { - if (PTR_ERR(state) == -ENOENT) { - d_add(dentry, NULL); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - } - nfs_unblock_sillyrename(parent); - return (struct dentry *)state; - } - res = d_add_unique(dentry, igrab(state->inode)); - if (res != NULL) - path.dentry = res; - nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); - nfs_unblock_sillyrename(parent); - nfs4_intent_set_file(nd, &path, state, fmode); - return res; -} - -int -nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) -{ - struct path path = { - .mnt = nd->path.mnt, - .dentry = dentry, - }; - struct rpc_cred *cred; - struct nfs4_state *state; - fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE); - - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return PTR_ERR(cred); - state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred); - put_rpccred(cred); - if (IS_ERR(state)) { - switch (PTR_ERR(state)) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - return PTR_ERR(state); - default: - goto out_drop; - } - } - if (state->inode == dentry->d_inode) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - nfs4_intent_set_file(nd, &path, state, fmode); - return 1; - } - nfs4_close_sync(&path, state, fmode); -out_drop: - d_drop(dentry); - return 0; + state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred); + if (IS_ERR(state)) + return ERR_CAST(state); + ctx->state = state; + return igrab(state->inode); } static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) @@ -2568,36 +2501,35 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nameidata *nd) + int flags, struct nfs_open_context *ctx) { - struct path path = { - .mnt = nd->path.mnt, + struct path my_path = { .dentry = dentry, }; + struct path *path = &my_path; struct nfs4_state *state; - struct rpc_cred *cred; - fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); + struct rpc_cred *cred = NULL; + fmode_t fmode = 0; int status = 0; - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) { - status = PTR_ERR(cred); - goto out; + if (ctx != NULL) { + cred = ctx->cred; + path = &ctx->path; + fmode = ctx->mode; } - state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); + sattr->ia_mode &= ~current_umask(); + state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); - goto out_putcred; + goto out; } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) - status = nfs4_intent_set_file(nd, &path, state, fmode); + if (ctx != NULL) + ctx->state = state; else - nfs4_close_sync(&path, state, fmode); -out_putcred: - put_rpccred(cred); + nfs4_close_sync(path, state, fmode); out: return status; } @@ -2655,6 +2587,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) args->bitmask = server->cache_consistency_bitmask; res->server = server; + res->seq_res.sr_slot = NULL; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; } @@ -2671,18 +2604,46 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } +static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_renameargs *arg = msg->rpc_argp; + struct nfs_renameres *res = msg->rpc_resp; + + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; + arg->bitmask = server->attr_bitmask; + res->server = server; +} + +static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, + struct inode *new_dir) +{ + struct nfs_renameres *res = task->tk_msg.rpc_resp; + + if (!nfs4_sequence_done(task, &res->seq_res)) + return 0; + if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) + return 0; + + update_changeattr(old_dir, &res->old_cinfo); + nfs_post_op_update_inode(old_dir, res->old_fattr); + update_changeattr(new_dir, &res->new_cinfo); + nfs_post_op_update_inode(new_dir, res->new_fattr); + return 1; +} + static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { struct nfs_server *server = NFS_SERVER(old_dir); - struct nfs4_rename_arg arg = { + struct nfs_renameargs arg = { .old_dir = NFS_FH(old_dir), .new_dir = NFS_FH(new_dir), .old_name = old_name, .new_name = new_name, .bitmask = server->attr_bitmask, }; - struct nfs4_rename_res res = { + struct nfs_renameres res = { .server = server, }; struct rpc_message msg = { @@ -2887,6 +2848,8 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, { struct nfs4_exception exception = { }; int err; + + sattr->ia_mode &= ~current_umask(); do { err = nfs4_handle_exception(NFS_SERVER(dir), _nfs4_proc_mkdir(dir, dentry, sattr), @@ -2896,15 +2859,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, } static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), - .pages = &page, + .pages = pages, .pgbase = 0, .count = count, .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, + .plus = plus, }; struct nfs4_readdir_res res; struct rpc_message msg = { @@ -2922,8 +2886,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); - if (status == 0) + if (status >= 0) { memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + status += args.pgbase; + } nfs_invalidate_atime(dir); @@ -2932,14 +2898,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, } static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), _nfs4_proc_readdir(dentry, cred, cookie, - page, count, plus), + pages, count, plus), &exception); } while (exception.retry); return err; @@ -2984,6 +2950,8 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, { struct nfs4_exception exception = { }; int err; + + sattr->ia_mode &= ~current_umask(); do { err = nfs4_handle_exception(NFS_SERVER(dir), _nfs4_proc_mknod(dir, dentry, sattr, rdev), @@ -3429,6 +3397,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) ret = nfs_revalidate_inode(server, inode); if (ret < 0) return ret; + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) return ret; @@ -3457,6 +3427,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = nfs4_call_sync(server, &msg, &arg, &res, 1); + /* + * Acl update can result in inode attribute update. + * so mark the attribute cache invalid. + */ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; + spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); return ret; @@ -3490,9 +3467,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, nfs4_state_mark_reclaim_nograce(clp, state); goto do_state_recovery; case -NFS4ERR_STALE_STATEID: - if (state == NULL) - break; - nfs4_state_mark_reclaim_reboot(clp, state); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: goto do_state_recovery; @@ -3540,6 +3514,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, + .sc_cb_ident = clp->cl_cb_ident, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], @@ -3579,7 +3554,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, if (signalled()) break; if (loop++ & 1) - ssleep(clp->cl_lease_time + 1); + ssleep(clp->cl_lease_time / HZ + 1); else if (++clp->cl_id_uniquifier == 0) break; @@ -3626,7 +3601,6 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, case -NFS4ERR_RESOURCE: /* The IBM lawyers misread another document! */ case -NFS4ERR_DELAY: - case -EKEYEXPIRED: err = nfs4_delay(clp->cl_rpcclient, &timeout); } } while (err == 0); @@ -3721,14 +3695,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co memcpy(&data->stateid, stateid, sizeof(data->stateid)); data->res.fattr = &data->fattr; data->res.server = server; - data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; task_setup_data.callback_data = data; - msg.rpc_argp = &data->args, - msg.rpc_resp = &data->res, + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -3807,6 +3780,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock goto out; lsp = request->fl_u.nfs4_fl.owner; arg.lock_owner.id = lsp->ls_id.id; + arg.lock_owner.s_dev = server->s_dev; status = nfs4_call_sync(server, &msg, &arg, &res, 1); switch (status) { case 0: @@ -3874,7 +3848,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.fl = &p->fl; p->arg.seqid = seqid; p->res.seqid = seqid; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->arg.stateid = &lsp->ls_stateid; p->lsp = lsp; atomic_inc(&lsp->ls_count); @@ -3973,8 +3946,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - msg.rpc_argp = &data->arg, - msg.rpc_resp = &data->res, + msg.rpc_argp = &data->arg; + msg.rpc_resp = &data->res; task_setup_data.callback_data = data; return rpc_run_task(&task_setup_data); } @@ -4053,8 +4026,8 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id.id; + p->arg.lock_owner.s_dev = server->s_dev; p->res.lock_seqid = p->arg.lock_seqid; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->lsp = lsp; p->server = server; atomic_inc(&lsp->ls_count); @@ -4211,8 +4184,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.reclaim = NFS_LOCK_RECLAIM; task_setup_data.callback_ops = &nfs4_recover_lock_ops; } - msg.rpc_argp = &data->arg, - msg.rpc_resp = &data->res, + msg.rpc_argp = &data->arg; + msg.rpc_resp = &data->res; task_setup_data.callback_data = data; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -4241,7 +4214,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); - if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) + if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -4266,7 +4239,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request goto out; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - case -EKEYEXPIRED: nfs4_handle_exception(server, err, &exception); err = 0; } @@ -4412,13 +4384,21 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) nfs4_state_mark_reclaim_nograce(server->nfs_client, state); err = 0; goto out; + case -EKEYEXPIRED: + /* + * User RPCSEC_GSS context has expired. + * We cannot recover this stateid now, so + * skip it and allow recovery thread to + * proceed. + */ + err = 0; + goto out; case -ENOMEM: case -NFS4ERR_DENIED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ err = 0; goto out; case -NFS4ERR_DELAY: - case -EKEYEXPIRED: break; } err = nfs4_handle_exception(server, err, &exception); @@ -4451,48 +4431,43 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) return; args->lock_owner.clientid = server->nfs_client->cl_clientid; args->lock_owner.id = lsp->ls_id.id; + args->lock_owner.s_dev = server->s_dev; msg.rpc_argp = args; rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); } #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" -int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, - size_t buflen, int flags) +static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key, + const void *buf, size_t buflen, + int flags, int type) { - struct inode *inode = dentry->d_inode; - - if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) - return -EOPNOTSUPP; + if (strcmp(key, "") != 0) + return -EINVAL; - return nfs4_proc_set_acl(inode, buf, buflen); + return nfs4_proc_set_acl(dentry->d_inode, buf, buflen); } -/* The getxattr man page suggests returning -ENODATA for unknown attributes, - * and that's what we'll do for e.g. user attributes that haven't been set. - * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported - * attributes in kernel-managed attribute namespaces. */ -ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, - size_t buflen) +static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key, + void *buf, size_t buflen, int type) { - struct inode *inode = dentry->d_inode; - - if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) - return -EOPNOTSUPP; + if (strcmp(key, "") != 0) + return -EINVAL; - return nfs4_proc_get_acl(inode, buf, buflen); + return nfs4_proc_get_acl(dentry->d_inode, buf, buflen); } -ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, + size_t list_len, const char *name, + size_t name_len, int type) { - size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + size_t len = sizeof(XATTR_NAME_NFSV4_ACL); if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode))) return 0; - if (buf && buflen < len) - return -ERANGE; - if (buf) - memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + + if (list && len <= list_len) + memcpy(list, XATTR_NAME_NFSV4_ACL, len); return len; } @@ -4545,6 +4520,25 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, #ifdef CONFIG_NFS_V4_1 /* + * Check the exchange flags returned by the server for invalid flags, having + * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or + * DS flags set. + */ +static int nfs4_check_cl_exchange_flags(u32 flags) +{ + if (flags & ~EXCHGID4_FLAG_MASK_R) + goto out_inval; + if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && + (flags & EXCHGID4_FLAG_USE_NON_PNFS)) + goto out_inval; + if (!(flags & (EXCHGID4_FLAG_MASK_PNFS))) + goto out_inval; + return NFS_OK; +out_inval: + return -NFS4ERR_INVAL; +} + +/* * nfs4_proc_exchange_id() * * Since the clientid has expired, all compounds using sessions @@ -4557,7 +4551,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) nfs4_verifier verifier; struct nfs41_exchange_id_args args = { .client = clp, - .flags = clp->cl_exchange_flags, + .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, }; struct nfs41_exchange_id_res res = { .client = clp, @@ -4574,34 +4568,21 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) dprintk("--> %s\n", __func__); BUG_ON(clp == NULL); - /* Remove server-only flags */ - args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R; - p = (u32 *)verifier.data; *p++ = htonl((u32)clp->cl_boot_time.tv_sec); *p = htonl((u32)clp->cl_boot_time.tv_nsec); args.verifier = &verifier; - while (1) { - args.id_len = scnprintf(args.id, sizeof(args.id), - "%s/%s %u", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - clp->cl_id_uniquifier); - - status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); - - if (status != -NFS4ERR_CLID_INUSE) - break; - - if (signalled()) - break; - - if (++clp->cl_id_uniquifier == 0) - break; - } + args.id_len = scnprintf(args.id, sizeof(args.id), + "%s/%s.%s/%u", + clp->cl_ipaddr, + init_utsname()->nodename, + init_utsname()->domainname, + clp->cl_rpcclient->cl_auth->au_flavor); + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (!status) + status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); dprintk("<-- %s status= %d\n", __func__, status); return status; } @@ -4647,7 +4628,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - case -EKEYEXPIRED: dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; @@ -4687,7 +4667,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; - res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -4837,17 +4816,17 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) if (!session) return NULL; - init_completion(&session->complete); - tbl = &session->fc_slot_table; tbl->highest_used_slotid = -1; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); + init_completion(&tbl->complete); tbl = &session->bc_slot_table; tbl->highest_used_slotid = -1; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); + init_completion(&tbl->complete); session->session_state = 1<<NFS4_SESSION_INITING; @@ -4914,49 +4893,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->bc_attrs.max_reqs); } -static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) +static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) { - if (rcvd <= sent) - return 0; - printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " - "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); - return -EINVAL; + struct nfs4_channel_attrs *sent = &args->fc_attrs; + struct nfs4_channel_attrs *rcvd = &session->fc_attrs; + + if (rcvd->headerpadsz > sent->headerpadsz) + return -EINVAL; + if (rcvd->max_resp_sz > sent->max_resp_sz) + return -EINVAL; + /* + * Our requested max_ops is the minimum we need; we're not + * prepared to break up compounds into smaller pieces than that. + * So, no point even trying to continue if the server won't + * cooperate: + */ + if (rcvd->max_ops < sent->max_ops) + return -EINVAL; + if (rcvd->max_reqs == 0) + return -EINVAL; + return 0; } -#define _verify_fore_channel_attr(_name_) \ - _verify_channel_attr("fore", #_name_, \ - args->fc_attrs._name_, \ - session->fc_attrs._name_) +static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) +{ + struct nfs4_channel_attrs *sent = &args->bc_attrs; + struct nfs4_channel_attrs *rcvd = &session->bc_attrs; -#define _verify_back_channel_attr(_name_) \ - _verify_channel_attr("back", #_name_, \ - args->bc_attrs._name_, \ - session->bc_attrs._name_) + if (rcvd->max_rqst_sz > sent->max_rqst_sz) + return -EINVAL; + if (rcvd->max_resp_sz < sent->max_resp_sz) + return -EINVAL; + if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) + return -EINVAL; + /* These would render the backchannel useless: */ + if (rcvd->max_ops == 0) + return -EINVAL; + if (rcvd->max_reqs == 0) + return -EINVAL; + return 0; +} -/* - * The server is not allowed to increase the fore channel header pad size, - * maximum response size, or maximum number of operations. - * - * The back channel attributes are only negotiatied down: We send what the - * (back channel) server insists upon. - */ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) { - int ret = 0; - - ret |= _verify_fore_channel_attr(headerpadsz); - ret |= _verify_fore_channel_attr(max_resp_sz); - ret |= _verify_fore_channel_attr(max_ops); - - ret |= _verify_back_channel_attr(headerpadsz); - ret |= _verify_back_channel_attr(max_rqst_sz); - ret |= _verify_back_channel_attr(max_resp_sz); - ret |= _verify_back_channel_attr(max_resp_sz_cached); - ret |= _verify_back_channel_attr(max_ops); - ret |= _verify_back_channel_attr(max_reqs); + int ret; - return ret; + ret = nfs4_verify_fore_channel_attrs(args, session); + if (ret) + return ret; + return nfs4_verify_back_channel_attrs(args, session); } static int _nfs4_proc_create_session(struct nfs_client *clp) @@ -5111,7 +5097,6 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client { switch(task->tk_status) { case -NFS4ERR_DELAY: - case -EKEYEXPIRED: rpc_delay(task, NFS4_POLL_RETRY_MAX); return -EAGAIN; default: @@ -5180,12 +5165,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ if (!atomic_inc_not_zero(&clp->cl_count)) return ERR_PTR(-EIO); - calldata = kmalloc(sizeof(*calldata), GFP_NOFS); + calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) { nfs_put_client(clp); return ERR_PTR(-ENOMEM); } - calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE; msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5254,7 +5238,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf case -NFS4ERR_WRONG_CRED: /* What to do here? */ break; case -NFS4ERR_DELAY: - case -EKEYEXPIRED: rpc_delay(task, NFS4_POLL_RETRY_MAX); return -EAGAIN; default: @@ -5317,7 +5300,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) goto out; calldata->clp = clp; calldata->arg.one_fs = 0; - calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; @@ -5333,6 +5315,152 @@ out: dprintk("<-- %s status=%d\n", __func__, status); return status; } + +static void +nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + + dprintk("--> %s\n", __func__); + /* Note the is a race here, where a CB_LAYOUTRECALL can come in + * right now covering the LAYOUTGET we are about to send. + * However, that is not so catastrophic, and there seems + * to be no way to prevent it completely. + */ + if (nfs4_setup_sequence(server, &lgp->args.seq_args, + &lgp->res.seq_res, 0, task)) + return; + if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, + NFS_I(lgp->args.inode)->layout, + lgp->args.ctx->state)) { + rpc_exit(task, NFS4_OK); + return; + } + rpc_call_start(task); +} + +static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &lgp->res.seq_res)) + return; + + switch (task->tk_status) { + case 0: + break; + case -NFS4ERR_LAYOUTTRYLATER: + case -NFS4ERR_RECALLCONFLICT: + task->tk_status = -NFS4ERR_DELAY; + /* Fall through */ + default: + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + rpc_restart_call_prepare(task); + return; + } + } + dprintk("<-- %s\n", __func__); +} + +static void nfs4_layoutget_release(void *calldata) +{ + struct nfs4_layoutget *lgp = calldata; + + dprintk("--> %s\n", __func__); + if (lgp->res.layout.buf != NULL) + free_page((unsigned long) lgp->res.layout.buf); + put_nfs_open_context(lgp->args.ctx); + kfree(calldata); + dprintk("<-- %s\n", __func__); +} + +static const struct rpc_call_ops nfs4_layoutget_call_ops = { + .rpc_call_prepare = nfs4_layoutget_prepare, + .rpc_call_done = nfs4_layoutget_done, + .rpc_release = nfs4_layoutget_release, +}; + +int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +{ + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], + .rpc_argp = &lgp->args, + .rpc_resp = &lgp->res, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_layoutget_call_ops, + .callback_data = lgp, + .flags = RPC_TASK_ASYNC, + }; + int status = 0; + + dprintk("--> %s\n", __func__); + + lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); + if (lgp->res.layout.buf == NULL) { + nfs4_layoutget_release(lgp); + return -ENOMEM; + } + + lgp->res.seq_res.sr_slot = NULL; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = nfs4_wait_for_completion_rpc_task(task); + if (status == 0) + status = task->tk_status; + if (status == 0) + status = pnfs_layout_process(lgp); + rpc_put_task(task); + dprintk("<-- %s status=%d\n", __func__, status); + return status; +} + +static int +_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) +{ + struct nfs4_getdeviceinfo_args args = { + .pdev = pdev, + }; + struct nfs4_getdeviceinfo_res res = { + .pdev = pdev, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + dprintk("--> %s\n", __func__); + status = nfs4_call_sync(server, &msg, &args, &res, 0); + dprintk("<-- %s status=%d\n", __func__, status); + + return status; +} + +int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_proc_getdeviceinfo(server, pdev), + &exception); + } while (exception.retry); + return err; +} +EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); + #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { @@ -5421,9 +5549,10 @@ static const struct inode_operations nfs4_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, - .getxattr = nfs4_getxattr, - .setxattr = nfs4_setxattr, - .listxattr = nfs4_listxattr, + .getxattr = generic_getxattr, + .setxattr = generic_setxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, }; const struct nfs_rpc_ops nfs_v4_clientops = { @@ -5443,6 +5572,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .unlink_setup = nfs4_proc_unlink_setup, .unlink_done = nfs4_proc_unlink_done, .rename = nfs4_proc_rename, + .rename_setup = nfs4_proc_rename_setup, + .rename_done = nfs4_proc_rename_done, .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, .mkdir = nfs4_proc_mkdir, @@ -5463,6 +5594,19 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, .close_context = nfs4_close_context, + .open_context = nfs4_atomic_open, +}; + +static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { + .prefix = XATTR_NAME_NFSV4_ACL, + .list = nfs4_xattr_list_nfs4_acl, + .get = nfs4_xattr_get_nfs4_acl, + .set = nfs4_xattr_set_nfs4_acl, +}; + +const struct xattr_handler *nfs4_xattr_handlers[] = { + &nfs4_xattr_nfs4_acl_handler, + NULL }; /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 72b6c580af1..402143d75fc 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -63,9 +63,14 @@ nfs4_renew_state(struct work_struct *work) ops = clp->cl_mvops->state_renewal_ops; dprintk("%s: start\n", __func__); - /* Are there any active superblocks? */ - if (list_empty(&clp->cl_superblocks)) + + rcu_read_lock(); + if (list_empty(&clp->cl_superblocks)) { + rcu_read_unlock(); goto out; + } + rcu_read_unlock(); + spin_lock(&clp->cl_lock); lease = clp->cl_lease_time; last = clp->cl_last_renewal; @@ -75,7 +80,7 @@ nfs4_renew_state(struct work_struct *work) cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { - if (list_empty(&clp->cl_delegations)) { + if (!nfs_delegations_present(clp)) { set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3e2f19b04c0..e6742b57a04 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -40,12 +40,13 @@ #include <linux/kernel.h> #include <linux/slab.h> -#include <linux/smp_lock.h> +#include <linux/fs.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> #include <linux/kthread.h> #include <linux/module.h> #include <linux/random.h> +#include <linux/ratelimit.h> #include <linux/workqueue.h> #include <linux/bitops.h> @@ -53,6 +54,7 @@ #include "callback.h" #include "delegation.h" #include "internal.h" +#include "pnfs.h" #define OPENOWNER_POOL_SIZE 8 @@ -103,14 +105,17 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp) put_rpccred(cred); } -struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) +static struct rpc_cred * +nfs4_get_renew_cred_server_locked(struct nfs_server *server) { + struct rpc_cred *cred = NULL; struct nfs4_state_owner *sp; struct rb_node *pos; - struct rpc_cred *cred = NULL; - for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { - sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + for (pos = rb_first(&server->state_owners); + pos != NULL; + pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_server_node); if (list_empty(&sp->so_states)) continue; cred = get_rpccred(sp->so_cred); @@ -119,6 +124,28 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) return cred; } +/** + * nfs4_get_renew_cred_locked - Acquire credential for a renew operation + * @clp: client state handle + * + * Returns an rpc_cred with reference count bumped, or NULL. + * Caller must hold clp->cl_lock. + */ +struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) +{ + struct rpc_cred *cred = NULL; + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + cred = nfs4_get_renew_cred_server_locked(server); + if (cred != NULL) + break; + } + rcu_read_unlock(); + return cred; +} + #if defined(CONFIG_NFS_V4_1) static int nfs41_setup_state_renewal(struct nfs_client *clp) @@ -140,6 +167,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) return status; } +/* + * Back channel returns NFS4ERR_DELAY for new requests when + * NFS4_SESSION_DRAINING is set so there is no work to be done when draining + * is ended. + */ static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; @@ -163,22 +195,32 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } -static int nfs4_begin_drain_session(struct nfs_client *clp) +static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) { - struct nfs4_session *ses = clp->cl_session; - struct nfs4_slot_table *tbl = &ses->fc_slot_table; - spin_lock(&tbl->slot_tbl_lock); - set_bit(NFS4_SESSION_DRAINING, &ses->session_state); if (tbl->highest_used_slotid != -1) { - INIT_COMPLETION(ses->complete); + INIT_COMPLETION(tbl->complete); spin_unlock(&tbl->slot_tbl_lock); - return wait_for_completion_interruptible(&ses->complete); + return wait_for_completion_interruptible(&tbl->complete); } spin_unlock(&tbl->slot_tbl_lock); return 0; } +static int nfs4_begin_drain_session(struct nfs_client *clp) +{ + struct nfs4_session *ses = clp->cl_session; + int ret = 0; + + set_bit(NFS4_SESSION_DRAINING, &ses->session_state); + /* back channel */ + ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table); + if (ret) + return ret; + /* fore channel */ + return nfs4_wait_on_slot_tbl(&ses->fc_slot_table); +} + int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { int status; @@ -208,28 +250,56 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) #endif /* CONFIG_NFS_V4_1 */ -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +static struct rpc_cred * +nfs4_get_setclientid_cred_server(struct nfs_server *server) { + struct nfs_client *clp = server->nfs_client; + struct rpc_cred *cred = NULL; struct nfs4_state_owner *sp; struct rb_node *pos; + + spin_lock(&clp->cl_lock); + pos = rb_first(&server->state_owners); + if (pos != NULL) { + sp = rb_entry(pos, struct nfs4_state_owner, so_server_node); + cred = get_rpccred(sp->so_cred); + } + spin_unlock(&clp->cl_lock); + return cred; +} + +/** + * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation + * @clp: client state handle + * + * Returns an rpc_cred with reference count bumped, or NULL. + */ +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +{ + struct nfs_server *server; struct rpc_cred *cred; spin_lock(&clp->cl_lock); cred = nfs4_get_machine_cred_locked(clp); + spin_unlock(&clp->cl_lock); if (cred != NULL) goto out; - pos = rb_first(&clp->cl_state_owners); - if (pos != NULL) { - sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); - cred = get_rpccred(sp->so_cred); + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + cred = nfs4_get_setclientid_cred_server(server); + if (cred != NULL) + break; } + rcu_read_unlock(); + out: - spin_unlock(&clp->cl_lock); return cred; } -static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, - __u64 minval, int maxbits) +static void nfs_alloc_unique_id_locked(struct rb_root *root, + struct nfs_unique_id *new, + __u64 minval, int maxbits) { struct rb_node **p, *parent; struct nfs_unique_id *pos; @@ -284,16 +354,15 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) } static struct nfs4_state_owner * -nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) +nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) { - struct nfs_client *clp = server->nfs_client; - struct rb_node **p = &clp->cl_state_owners.rb_node, + struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp, *res = NULL; while (*p != NULL) { parent = *p; - sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); if (server < sp->so_server) { p = &parent->rb_left; @@ -317,24 +386,17 @@ nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) } static struct nfs4_state_owner * -nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) +nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) { - struct rb_node **p = &clp->cl_state_owners.rb_node, + struct nfs_server *server = new->so_server; + struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; while (*p != NULL) { parent = *p; - sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); + sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); - if (new->so_server < sp->so_server) { - p = &parent->rb_left; - continue; - } - if (new->so_server > sp->so_server) { - p = &parent->rb_right; - continue; - } if (new->so_cred < sp->so_cred) p = &parent->rb_left; else if (new->so_cred > sp->so_cred) @@ -344,18 +406,21 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) return sp; } } - nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64); - rb_link_node(&new->so_client_node, parent, p); - rb_insert_color(&new->so_client_node, &clp->cl_state_owners); + nfs_alloc_unique_id_locked(&server->openowner_id, + &new->so_owner_id, 1, 64); + rb_link_node(&new->so_server_node, parent, p); + rb_insert_color(&new->so_server_node, &server->state_owners); return new; } static void -nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp) +nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) { - if (!RB_EMPTY_NODE(&sp->so_client_node)) - rb_erase(&sp->so_client_node, &clp->cl_state_owners); - nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id); + struct nfs_server *server = sp->so_server; + + if (!RB_EMPTY_NODE(&sp->so_server_node)) + rb_erase(&sp->so_server_node, &server->state_owners); + nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id); } /* @@ -384,23 +449,32 @@ nfs4_alloc_state_owner(void) static void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - if (!RB_EMPTY_NODE(&sp->so_client_node)) { - struct nfs_client *clp = sp->so_server->nfs_client; + if (!RB_EMPTY_NODE(&sp->so_server_node)) { + struct nfs_server *server = sp->so_server; + struct nfs_client *clp = server->nfs_client; spin_lock(&clp->cl_lock); - rb_erase(&sp->so_client_node, &clp->cl_state_owners); - RB_CLEAR_NODE(&sp->so_client_node); + rb_erase(&sp->so_server_node, &server->state_owners); + RB_CLEAR_NODE(&sp->so_server_node); spin_unlock(&clp->cl_lock); } } -struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) +/** + * nfs4_get_state_owner - Look up a state owner given a credential + * @server: nfs_server to search + * @cred: RPC credential to match + * + * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. + */ +struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, + struct rpc_cred *cred) { struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; spin_lock(&clp->cl_lock); - sp = nfs4_find_state_owner(server, cred); + sp = nfs4_find_state_owner_locked(server, cred); spin_unlock(&clp->cl_lock); if (sp != NULL) return sp; @@ -410,7 +484,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct new->so_server = server; new->so_cred = cred; spin_lock(&clp->cl_lock); - sp = nfs4_insert_state_owner(clp, new); + sp = nfs4_insert_state_owner_locked(new); spin_unlock(&clp->cl_lock); if (sp == new) get_rpccred(cred); @@ -421,6 +495,11 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct return sp; } +/** + * nfs4_put_state_owner - Release a nfs4_state_owner + * @sp: state owner data to release + * + */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { struct nfs_client *clp = sp->so_server->nfs_client; @@ -428,7 +507,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - nfs4_remove_state_owner(clp, sp); + nfs4_remove_state_owner_locked(sp); spin_unlock(&clp->cl_lock); rpc_destroy_wait_queue(&sp->so_sequence.wait); put_rpccred(cred); @@ -583,8 +662,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, if (!call_close) { nfs4_put_open_state(state); nfs4_put_state_owner(owner); - } else - nfs4_do_close(path, state, gfp_mask, wait); + } else { + bool roc = pnfs_roc(state->inode); + + nfs4_do_close(path, state, gfp_mask, wait, roc); + } } void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) @@ -631,7 +713,8 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) { struct nfs4_lock_state *lsp; - struct nfs_client *clp = state->owner->so_server->nfs_client; + struct nfs_server *server = state->owner->so_server; + struct nfs_client *clp = server->nfs_client; lsp = kzalloc(sizeof(*lsp), GFP_NOFS); if (lsp == NULL) @@ -655,7 +738,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f return NULL; } spin_lock(&clp->cl_lock); - nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); + nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64); spin_unlock(&clp->cl_lock); INIT_LIST_HEAD(&lsp->ls_locks); return lsp; @@ -663,10 +746,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) { - struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client; + struct nfs_server *server = lsp->ls_state->owner->so_server; + struct nfs_client *clp = server->nfs_client; spin_lock(&clp->cl_lock); - nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); + nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id); spin_unlock(&clp->cl_lock); rpc_destroy_wait_queue(&lsp->ls_sequence.wait); kfree(lsp); @@ -970,13 +1054,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); /* Protect inode->i_flock using the BKL */ - lock_kernel(); + lock_flocks(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file)->state != state) continue; - unlock_kernel(); + unlock_flocks(); status = ops->recover_lock(state, fl); switch (status) { case 0: @@ -1003,9 +1087,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ /* kill_proc(fl->fl_pid, SIGLOST, 1); */ status = 0; } - lock_kernel(); + lock_flocks(); } - unlock_kernel(); + unlock_flocks(); out: up_write(&nfsi->rwsem); return status; @@ -1063,6 +1147,14 @@ restart: /* Mark the file as being 'closed' */ state->state = 0; break; + case -EKEYEXPIRED: + /* + * User RPCSEC_GSS context has expired. + * We cannot recover this stateid now, so + * skip it and allow recovery thread to + * proceed. + */ + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: @@ -1104,15 +1196,19 @@ static void nfs4_clear_open_state(struct nfs4_state *state) } } -static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state)) +static void nfs4_reset_seqids(struct nfs_server *server, + int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state)) { + struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp; struct rb_node *pos; struct nfs4_state *state; - /* Reset all sequence ids to zero */ - for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { - sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + spin_lock(&clp->cl_lock); + for (pos = rb_first(&server->state_owners); + pos != NULL; + pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_server_node); sp->so_seqid.flags = 0; spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { @@ -1121,6 +1217,18 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re } spin_unlock(&sp->so_lock); } + spin_unlock(&clp->cl_lock); +} + +static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, + int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state)) +{ + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs4_reset_seqids(server, mark_reclaim); + rcu_read_unlock(); } static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) @@ -1138,29 +1246,51 @@ static void nfs4_reclaim_complete(struct nfs_client *clp, (void)ops->reclaim_complete(clp); } -static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) +static void nfs4_clear_reclaim_server(struct nfs_server *server) { + struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp; struct rb_node *pos; struct nfs4_state *state; - if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) - return; - - nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); - - for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { - sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + spin_lock(&clp->cl_lock); + for (pos = rb_first(&server->state_owners); + pos != NULL; + pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_server_node); spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { - if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags)) + if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, + &state->flags)) continue; nfs4_state_mark_reclaim_nograce(clp, state); } spin_unlock(&sp->so_lock); } + spin_unlock(&clp->cl_lock); +} + +static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) +{ + struct nfs_server *server; + + if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + return 0; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs4_clear_reclaim_server(server); + rcu_read_unlock(); nfs_delegation_reap_unclaimed(clp); + return 1; +} + +static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) +{ + if (!nfs4_state_clear_reclaim_reboot(clp)) + return; + nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); } static void nfs_delegation_clear_all(struct nfs_client *clp) @@ -1175,6 +1305,14 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); } +static void nfs4_warn_keyexpired(const char *s) +{ + printk_ratelimited(KERN_WARNING "Error: state manager" + " encountered RPCSEC_GSS session" + " expired against NFSv4 server %s.\n", + s); +} + static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) { switch (error) { @@ -1187,7 +1325,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_LEASE_MOVED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_end_reclaim_reboot(clp); + nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_EXPIRED: @@ -1204,33 +1342,50 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* Zero session reset errors */ return 0; + case -EKEYEXPIRED: + /* Nothing we can do */ + nfs4_warn_keyexpired(clp->cl_hostname); + return 0; } return error; } static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) { + struct nfs4_state_owner *sp; + struct nfs_server *server; struct rb_node *pos; int status = 0; restart: - spin_lock(&clp->cl_lock); - for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { - struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); - if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags)) - continue; - atomic_inc(&sp->so_count); - spin_unlock(&clp->cl_lock); - status = nfs4_reclaim_open_state(sp, ops); - if (status < 0) { - set_bit(ops->owner_flag_bit, &sp->so_flags); + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + spin_lock(&clp->cl_lock); + for (pos = rb_first(&server->state_owners); + pos != NULL; + pos = rb_next(pos)) { + sp = rb_entry(pos, + struct nfs4_state_owner, so_server_node); + if (!test_and_clear_bit(ops->owner_flag_bit, + &sp->so_flags)) + continue; + atomic_inc(&sp->so_count); + spin_unlock(&clp->cl_lock); + rcu_read_unlock(); + + status = nfs4_reclaim_open_state(sp, ops); + if (status < 0) { + set_bit(ops->owner_flag_bit, &sp->so_flags); + nfs4_put_state_owner(sp); + return nfs4_recovery_handle_error(clp, status); + } + nfs4_put_state_owner(sp); - return nfs4_recovery_handle_error(clp, status); + goto restart; } - nfs4_put_state_owner(sp); - goto restart; + spin_unlock(&clp->cl_lock); } - spin_unlock(&clp->cl_lock); + rcu_read_unlock(); return status; } @@ -1414,9 +1569,10 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status) case -NFS4ERR_DELAY: case -NFS4ERR_CLID_INUSE: case -EAGAIN: - case -EKEYEXPIRED: break; + case -EKEYEXPIRED: + nfs4_warn_keyexpired(clp->cl_hostname); case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ default: @@ -1447,6 +1603,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + pnfs_destroy_all_layouts(clp); } if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 08ef9129113..4e2c168b6ee 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,6 +52,7 @@ #include <linux/nfs_idmap.h> #include "nfs4_fs.h" #include "internal.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -70,8 +71,8 @@ static int nfs4_stat_to_errno(int); /* lock,open owner id: * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define open_owner_id_maxsz (1 + 4) -#define lock_owner_id_maxsz (1 + 4) +#define open_owner_id_maxsz (1 + 1 + 4) +#define lock_owner_id_maxsz (1 + 1 + 4) #define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) @@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) +#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ + XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) +#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ + 1 /* layout type */ + \ + 1 /* opaque devaddr4 length */ + \ + /* devaddr4 payload is read into page */ \ + 1 /* notification bitmap length */ + \ + 1 /* notification bitmap */) +#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ + encode_stateid_maxsz) +#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ + decode_stateid_maxsz + \ + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int); #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_reclaim_complete_maxsz) +#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz +\ + encode_getdeviceinfo_maxsz) +#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_getdeviceinfo_maxsz) +#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_layoutget_maxsz) +#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_layoutget_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -816,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); + owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); if (owner_namelen < 0) { dprintk("nfs: couldn't resolve uid %d to string\n", iap->ia_uid); @@ -828,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); + owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); if (owner_grouplen < 0) { dprintk("nfs: couldn't resolve gid %d to string\n", iap->ia_gid); @@ -1060,10 +1088,11 @@ static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lo { __be32 *p; - p = reserve_space(xdr, 28); + p = reserve_space(xdr, 32); p = xdr_encode_hyper(p, lowner->clientid); - *p++ = cpu_to_be32(16); + *p++ = cpu_to_be32(20); p = xdr_encode_opaque_fixed(p, "lock id:", 8); + *p++ = cpu_to_be32(lowner->s_dev); xdr_encode_hyper(p, lowner->id); } @@ -1182,10 +1211,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena *p++ = cpu_to_be32(OP_OPEN); *p = cpu_to_be32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); - p = reserve_space(xdr, 28); + p = reserve_space(xdr, 32); p = xdr_encode_hyper(p, arg->clientid); - *p++ = cpu_to_be32(16); + *p++ = cpu_to_be32(20); p = xdr_encode_opaque_fixed(p, "open id:", 8); + *p++ = cpu_to_be32(arg->server->s_dev); xdr_encode_hyper(p, arg->id); } @@ -1385,24 +1415,35 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { - uint32_t attrs[2] = { - FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, - FATTR4_WORD1_MOUNTED_ON_FILEID, - }; + uint32_t attrs[2] = {0, 0}; + uint32_t dircount = readdir->count >> 1; __be32 *p; + if (readdir->plus) { + attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| + FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE; + attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| + FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| + FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| + FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + dircount >>= 1; + } + attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID; + attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + /* Switch to mounted_on_fileid if the server supports it */ + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + attrs[0] &= ~FATTR4_WORD0_FILEID; + else + attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); *p++ = cpu_to_be32(OP_READDIR); p = xdr_encode_hyper(p, readdir->cookie); p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); - *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ + *p++ = cpu_to_be32(dircount); *p++ = cpu_to_be32(readdir->count); *p++ = cpu_to_be32(2); - /* Switch to mounted_on_fileid if the server supports it */ - if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - attrs[0] &= ~FATTR4_WORD0_FILEID; - else - attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); hdr->nops++; @@ -1471,7 +1512,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) hdr->replen += decode_restorefh_maxsz; } -static int +static void encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -1482,14 +1523,12 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun p = reserve_space(xdr, 2*4); *p++ = cpu_to_be32(1); *p = cpu_to_be32(FATTR4_WORD0_ACL); - if (arg->acl_len % 4) - return -EINVAL; + BUG_ON(arg->acl_len % 4); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); hdr->nops++; hdr->replen += decode_setacl_maxsz; - return 0; } static void @@ -1726,6 +1765,55 @@ static void encode_sequence(struct xdr_stream *xdr, #endif /* CONFIG_NFS_V4_1 */ } +#ifdef CONFIG_NFS_V4_1 +static void +encode_getdeviceinfo(struct xdr_stream *xdr, + const struct nfs4_getdeviceinfo_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); + *p++ = cpu_to_be32(OP_GETDEVICEINFO); + p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, + NFS4_DEVICEID4_SIZE); + *p++ = cpu_to_be32(args->pdev->layout_type); + *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ + *p++ = cpu_to_be32(0); /* bitmap length 0 */ + hdr->nops++; + hdr->replen += decode_getdeviceinfo_maxsz; +} + +static void +encode_layoutget(struct xdr_stream *xdr, + const struct nfs4_layoutget_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_LAYOUTGET); + *p++ = cpu_to_be32(0); /* Signal layout available */ + *p++ = cpu_to_be32(args->type); + *p++ = cpu_to_be32(args->range.iomode); + p = xdr_encode_hyper(p, args->range.offset); + p = xdr_encode_hyper(p, args->range.length); + p = xdr_encode_hyper(p, args->minlength); + p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(args->maxcount); + + dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", + __func__, + args->type, + args->range.iomode, + (unsigned long)args->range.offset, + (unsigned long)args->range.length, + args->maxcount); + hdr->nops++; + hdr->replen += decode_layoutget_maxsz; +} +#endif /* CONFIG_NFS_V4_1 */ + /* * END OF "GENERIC" ENCODE ROUTINES. */ @@ -1742,393 +1830,362 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) /* * Encode an ACCESS request */ -static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args) +static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_accessargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_access(&xdr, args->access, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_access(xdr, args->access, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode LOOKUP request */ -static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args) +static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_lookup_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->dir_fh, &hdr); - encode_lookup(&xdr, args->name, &hdr); - encode_getfh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->dir_fh, &hdr); + encode_lookup(xdr, args->name, &hdr); + encode_getfh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode LOOKUP_ROOT request */ -static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args) +static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs4_lookup_root_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putrootfh(&xdr, &hdr); - encode_getfh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putrootfh(xdr, &hdr); + encode_getfh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode REMOVE request */ -static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) +static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs_removeargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_remove(&xdr, &args->name, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_remove(xdr, &args->name, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode RENAME request */ -static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) +static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs_renameargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->old_dir, &hdr); - encode_savefh(&xdr, &hdr); - encode_putfh(&xdr, args->new_dir, &hdr); - encode_rename(&xdr, args->old_name, args->new_name, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); - encode_restorefh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->old_dir, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->new_dir, &hdr); + encode_rename(xdr, args->old_name, args->new_name, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); + encode_restorefh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode LINK request */ -static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args) +static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_link_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_savefh(&xdr, &hdr); - encode_putfh(&xdr, args->dir_fh, &hdr); - encode_link(&xdr, args->name, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); - encode_restorefh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->dir_fh, &hdr); + encode_link(xdr, args->name, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); + encode_restorefh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode CREATE request */ -static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) +static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_create_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->dir_fh, &hdr); - encode_savefh(&xdr, &hdr); - encode_create(&xdr, args, &hdr); - encode_getfh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); - encode_restorefh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->dir_fh, &hdr); + encode_savefh(xdr, &hdr); + encode_create(xdr, args, &hdr); + encode_getfh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); + encode_restorefh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode SYMLINK request */ -static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) +static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_create_arg *args) { - return nfs4_xdr_enc_create(req, p, args); + nfs4_xdr_enc_create(req, xdr, args); } /* * Encode GETATTR request */ -static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args) +static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_getattr_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode a CLOSE request */ -static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) +static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_closeargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_close(&xdr, args, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_close(xdr, args, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode an OPEN request */ -static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) +static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_openargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_savefh(&xdr, &hdr); - encode_open(&xdr, args, &hdr); - encode_getfh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); - encode_restorefh(&xdr, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_savefh(xdr, &hdr); + encode_open(xdr, args, &hdr); + encode_getfh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); + encode_restorefh(xdr, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode an OPEN_CONFIRM request */ -static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args) +static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_open_confirmargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 0, }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_open_confirm(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_open_confirm(xdr, args, &hdr); encode_nops(&hdr); - return 0; } /* * Encode an OPEN request with no attributes. */ -static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) +static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_openargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_open(&xdr, args, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_open(xdr, args, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode an OPEN_DOWNGRADE request */ -static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) +static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_closeargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_open_downgrade(&xdr, args, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_open_downgrade(xdr, args, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode a LOCK request */ -static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args) +static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_lock_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_lock(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_lock(xdr, args, &hdr); encode_nops(&hdr); - return 0; } /* * Encode a LOCKT request */ -static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args) +static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_lockt_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_lockt(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_lockt(xdr, args, &hdr); encode_nops(&hdr); - return 0; } /* * Encode a LOCKU request */ -static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args) +static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_locku_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_locku(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_locku(xdr, args, &hdr); encode_nops(&hdr); - return 0; } -static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args) +static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_release_lockowner_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = 0, }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_release_lockowner(&xdr, &args->lock_owner, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_release_lockowner(xdr, &args->lock_owner, &hdr); encode_nops(&hdr); - return 0; } /* * Encode a READLINK request */ -static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args) +static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_readlink *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_readlink(&xdr, args, req, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_readlink(xdr, args, req, &hdr); xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->pglen); encode_nops(&hdr); - return 0; } /* * Encode a READDIR request */ -static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args) +static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_readdir_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_readdir(&xdr, args, req, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_readdir(xdr, args, req, &hdr); xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->count); @@ -2136,413 +2193,414 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf __func__, hdr.replen << 2, args->pages, args->pgbase, args->count); encode_nops(&hdr); - return 0; } /* * Encode a READ request */ -static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) +static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_readargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_read(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_read(xdr, args, &hdr); xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->count); req->rq_rcv_buf.flags |= XDRBUF_READ; encode_nops(&hdr); - return 0; } /* * Encode an SETATTR request */ -static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args) +static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_setattrargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_setattr(&xdr, args, args->server, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_setattr(xdr, args, args->server, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode a GETACL request */ -static int -nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, - struct nfs_getaclargs *args) +static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_getaclargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; uint32_t replen; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; - encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); + encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); encode_nops(&hdr); - return 0; } /* * Encode a WRITE request */ -static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_writeargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_write(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_write(xdr, args, &hdr); req->rq_snd_buf.flags |= XDRBUF_WRITE; - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * a COMMIT request */ -static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) +static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_writeargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_commit(&xdr, args, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_commit(xdr, args, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * FSINFO request */ -static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args) +static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs4_fsinfo_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_fsinfo(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_fsinfo(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * a PATHCONF request */ -static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args) +static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_pathconf_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], &hdr); encode_nops(&hdr); - return 0; } /* * a STATFS request */ -static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args) +static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfs4_statfs_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); encode_nops(&hdr); - return 0; } /* * GETATTR_BITMAP request */ -static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, - struct nfs4_server_caps_arg *args) +static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_server_caps_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fhandle, &hdr); - encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fhandle, &hdr); + encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| FATTR4_WORD0_LINK_SUPPORT| FATTR4_WORD0_SYMLINK_SUPPORT| FATTR4_WORD0_ACLSUPPORT, &hdr); encode_nops(&hdr); - return 0; } /* * a RENEW request */ -static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) +static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_client *clp) { - struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 0, }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_renew(&xdr, clp, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_renew(xdr, clp, &hdr); encode_nops(&hdr); - return 0; } /* * a SETCLIENTID request */ -static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc) +static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_setclientid *sc) { - struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 0, }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_setclientid(&xdr, sc, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_setclientid(xdr, sc, &hdr); encode_nops(&hdr); - return 0; } /* * a SETCLIENTID_CONFIRM request */ -static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg) +static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_setclientid_res *arg) { - struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 0, }; const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_setclientid_confirm(&xdr, arg, &hdr); - encode_putrootfh(&xdr, &hdr); - encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_setclientid_confirm(xdr, arg, &hdr); + encode_putrootfh(xdr, &hdr); + encode_fsinfo(xdr, lease_bitmap, &hdr); encode_nops(&hdr); - return 0; } /* * DELEGRETURN request */ -static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args) +static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, + struct xdr_stream *xdr, + const struct nfs4_delegreturnargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fhandle, &hdr); - encode_delegreturn(&xdr, args->stateid, &hdr); - encode_getfattr(&xdr, args->bitmask, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fhandle, &hdr); + encode_delegreturn(xdr, args->stateid, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; } /* * Encode FS_LOCATIONS request */ -static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args) +static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_fs_locations_arg *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; uint32_t replen; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->dir_fh, &hdr); - encode_lookup(&xdr, args->name, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->dir_fh, &hdr); + encode_lookup(xdr, args->name, &hdr); replen = hdr.replen; /* get the attribute into args->page */ - encode_fs_locations(&xdr, args->bitmask, &hdr); + encode_fs_locations(xdr, args->bitmask, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 0, PAGE_SIZE); encode_nops(&hdr); - return 0; } #if defined(CONFIG_NFS_V4_1) /* * EXCHANGE_ID request */ -static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p, - struct nfs41_exchange_id_args *args) +static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_exchange_id_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = args->client->cl_mvops->minor_version, }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_exchange_id(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_exchange_id(xdr, args, &hdr); encode_nops(&hdr); - return 0; } /* * a CREATE_SESSION request */ -static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p, - struct nfs41_create_session_args *args) +static void nfs4_xdr_enc_create_session(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_create_session_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = args->client->cl_mvops->minor_version, }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_create_session(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_create_session(xdr, args, &hdr); encode_nops(&hdr); - return 0; } /* * a DESTROY_SESSION request */ -static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p, - struct nfs4_session *session) +static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_session *session) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = session->clp->cl_mvops->minor_version, }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_destroy_session(&xdr, session, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_destroy_session(xdr, session, &hdr); encode_nops(&hdr); - return 0; } /* * a SEQUENCE request */ -static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p, - struct nfs4_sequence_args *args) +static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs4_sequence_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(args), }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, args, &hdr); encode_nops(&hdr); - return 0; } /* * a GET_LEASE_TIME request */ -static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, - struct nfs4_get_lease_time_args *args) +static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_get_lease_time_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), }; const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->la_seq_args, &hdr); - encode_putrootfh(&xdr, &hdr); - encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->la_seq_args, &hdr); + encode_putrootfh(xdr, &hdr); + encode_fsinfo(xdr, lease_bitmap, &hdr); encode_nops(&hdr); - return 0; } /* * a RECLAIM_COMPLETE request */ -static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, - struct nfs41_reclaim_complete_args *args) +static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_reclaim_complete_args *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args) }; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_reclaim_complete(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_reclaim_complete(xdr, args, &hdr); encode_nops(&hdr); - return 0; } +/* + * Encode GETDEVICEINFO request + */ +static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_getdeviceinfo_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_getdeviceinfo(xdr, args, &hdr); + + /* set up reply kvec. Subtract notification bitmap max size (2) + * so that notification bitmap is put in xdr_buf tail */ + xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, + args->pdev->pages, args->pdev->pgbase, + args->pdev->pglen); + + encode_nops(&hdr); +} + +/* + * Encode LAYOUTGET request + */ +static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutget_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, NFS_FH(args->inode), &hdr); + encode_layoutget(xdr, args, &hdr); + encode_nops(&hdr); +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -2676,7 +2734,10 @@ out_overflow: static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) { if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { - decode_attr_bitmap(xdr, bitmask); + int ret; + ret = decode_attr_bitmap(xdr, bitmask); + if (unlikely(ret < 0)) + return ret; bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; } else bitmask[0] = bitmask[1] = 0; @@ -2848,6 +2909,56 @@ out_overflow: return -EIO; } +static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) +{ + __be32 *p; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_RDATTR_ERROR - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh) +{ + __be32 *p; + int len; + + if (fh != NULL) + memset(fh, 0, sizeof(*fh)); + + if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEHANDLE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + if (len > NFS4_FHSIZE) + return -EIO; + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + goto out_overflow; + if (fh != NULL) { + memcpy(fh->data, p, len); + fh->size = len; + } + bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE; + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) { __be32 *p; @@ -3521,6 +3632,24 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s return status; } +static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, + struct timespec *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_DELTA - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_DELTA)) { + status = decode_attr_time(xdr, time); + bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA; + } + dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec, + (long)time->tv_nsec); + return status; +} + static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) { int status = 0; @@ -3744,29 +3873,14 @@ xdr_error: return status; } -static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, +static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs_fattr *fattr, struct nfs_fh *fh, const struct nfs_server *server, int may_sleep) { - __be32 *savep; - uint32_t attrlen, - bitmap[2] = {0}, - type; int status; umode_t fmode = 0; uint64_t fileid; - - status = decode_op_hdr(xdr, OP_GETATTR); - if (status < 0) - goto xdr_error; - - status = decode_attr_bitmap(xdr, bitmap); - if (status < 0) - goto xdr_error; - - status = decode_attr_length(xdr, &attrlen, &savep); - if (status < 0) - goto xdr_error; - + uint32_t type; status = decode_attr_type(xdr, bitmap, &type); if (status < 0) @@ -3792,6 +3906,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, goto xdr_error; fattr->valid |= status; + status = decode_attr_error(xdr, bitmap); + if (status < 0) + goto xdr_error; + + status = decode_attr_filehandle(xdr, bitmap, fh); + if (status < 0) + goto xdr_error; + status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); if (status < 0) goto xdr_error; @@ -3862,12 +3984,101 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, fattr->valid |= status; } +xdr_error: + dprintk("%s: xdr returned %d\n", __func__, -status); + return status; +} + +static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, + struct nfs_fh *fh, const struct nfs_server *server, int may_sleep) +{ + __be32 *savep; + uint32_t attrlen, + bitmap[2] = {0}; + int status; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status < 0) + goto xdr_error; + + status = decode_attr_bitmap(xdr, bitmap); + if (status < 0) + goto xdr_error; + + status = decode_attr_length(xdr, &attrlen, &savep); + if (status < 0) + goto xdr_error; + + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep); + if (status < 0) + goto xdr_error; + status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d\n", __func__, -status); return status; } +static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, + const struct nfs_server *server, int may_sleep) +{ + return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); +} + +/* + * Decode potentially multiple layout types. Currently we only support + * one layout driver per file system. + */ +static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, + uint32_t *layouttype) +{ + uint32_t *p; + int num; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num = be32_to_cpup(p); + + /* pNFS is not supported by the underlying file system */ + if (num == 0) { + *layouttype = 0; + return 0; + } + if (num > 1) + printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " + "per filesystem not supported\n", __func__); + + /* Decode and set first layout type, move xdr->p past unused types */ + p = xdr_inline_decode(xdr, num * 4); + if (unlikely(!p)) + goto out_overflow; + *layouttype = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +/* + * The type of file system exported. + * Note we must ensure that layouttype is set in any non-error case. + */ +static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *layouttype) +{ + int status = 0; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[1]); + if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) + return -EIO; + if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) { + status = decode_first_pnfs_layout_type(xdr, layouttype); + bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; + } else + *layouttype = 0; + return status; +} static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { @@ -3894,6 +4105,12 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) goto xdr_error; fsinfo->wtpref = fsinfo->wtmax; + status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); + if (status != 0) + goto xdr_error; + status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); + if (status != 0) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: @@ -3950,13 +4167,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) __be32 *p; uint32_t namelen, type; - p = xdr_inline_decode(xdr, 32); + p = xdr_inline_decode(xdr, 32); /* read 32 bytes */ if (unlikely(!p)) goto out_overflow; - p = xdr_decode_hyper(p, &offset); + p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */ p = xdr_decode_hyper(p, &length); - type = be32_to_cpup(p++); - if (fl != NULL) { + type = be32_to_cpup(p++); /* 4 byte read */ + if (fl != NULL) { /* manipulate file lock */ fl->fl_start = (loff_t)offset; fl->fl_end = fl->fl_start + (loff_t)length - 1; if (length == ~(uint64_t)0) @@ -3966,9 +4183,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) fl->fl_type = F_RDLCK; fl->fl_pid = 0; } - p = xdr_decode_hyper(p, &clientid); - namelen = be32_to_cpup(p); - p = xdr_inline_decode(xdr, namelen); + p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */ + namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */ + p = xdr_inline_decode(xdr, namelen); /* variable size field */ if (likely(p)) return -NFS4ERR_DENIED; out_overflow: @@ -4180,7 +4397,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ goto out_overflow; eof = be32_to_cpup(p++); count = be32_to_cpup(p); - hdrlen = (u8 *) p - (u8 *) iov->iov_base; + hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { dprintk("NFS: server cheating in read reply: " @@ -4200,12 +4417,9 @@ out_overflow: static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct page *page = *rcvbuf->pages; struct kvec *iov = rcvbuf->head; size_t hdrlen; u32 recvd, pglen = rcvbuf->page_len; - __be32 *end, *entry, *p, *kaddr; - unsigned int nr = 0; int status; status = decode_op_hdr(xdr, OP_READDIR); @@ -4225,71 +4439,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n pglen = recvd; xdr_read_pages(xdr, pglen); - BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); - kaddr = p = kmap_atomic(page, KM_USER0); - end = p + ((pglen + readdir->pgbase) >> 2); - entry = p; - - /* Make sure the packet actually has a value_follows and EOF entry */ - if ((entry + 1) > end) - goto short_pkt; - - for (; *p++; nr++) { - u32 len, attrlen, xlen; - if (end - p < 3) - goto short_pkt; - dprintk("cookie = %Lu, ", *((unsigned long long *)p)); - p += 2; /* cookie */ - len = ntohl(*p++); /* filename length */ - if (len > NFS4_MAXNAMLEN) { - dprintk("NFS: giant filename in readdir (len 0x%x)\n", - len); - goto err_unmap; - } - xlen = XDR_QUADLEN(len); - if (end - p < xlen + 1) - goto short_pkt; - dprintk("filename = %*s\n", len, (char *)p); - p += xlen; - len = ntohl(*p++); /* bitmap length */ - if (end - p < len + 1) - goto short_pkt; - p += len; - attrlen = XDR_QUADLEN(ntohl(*p++)); - if (end - p < attrlen + 2) - goto short_pkt; - p += attrlen; /* attributes */ - entry = p; - } - /* - * Apparently some server sends responses that are a valid size, but - * contain no entries, and have value_follows==0 and EOF==0. For - * those, just set the EOF marker. - */ - if (!nr && entry[1] == 0) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } -out: - kunmap_atomic(kaddr, KM_USER0); - return 0; -short_pkt: - /* - * When we get a short packet there are 2 possibilities. We can - * return an error, or fix up the response to look like a valid - * response and return what we have so far. If there are no - * entries and the packet was short, then return -EIO. If there - * are valid entries in the response, return them and pretend that - * the call was successful, but incomplete. The caller can retry the - * readdir starting at the last cookie. - */ - dprintk("%s: short packet at entry %d\n", __func__, nr); - entry[0] = entry[1] = 0; - if (nr) - goto out; -err_unmap: - kunmap_atomic(kaddr, KM_USER0); - return -errno_NFSERR_IO; + + return pglen; } static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) @@ -4299,7 +4450,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) size_t hdrlen; u32 len, recvd; __be32 *p; - char *kaddr; int status; status = decode_op_hdr(xdr, OP_READLINK); @@ -4330,9 +4480,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) * and and null-terminate the text (the VFS expects * null-termination). */ - kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); - kaddr[len+rcvbuf->page_base] = '\0'; - kunmap_atomic(kaddr, KM_USER0); + xdr_terminate_string(rcvbuf, len); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4668,7 +4816,6 @@ static int decode_sequence(struct xdr_stream *xdr, struct rpc_rqst *rqstp) { #if defined(CONFIG_NFS_V4_1) - struct nfs4_slot *slot; struct nfs4_sessionid id; u32 dummy; int status; @@ -4700,15 +4847,14 @@ static int decode_sequence(struct xdr_stream *xdr, goto out_overflow; /* seqid */ - slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; dummy = be32_to_cpup(p++); - if (dummy != slot->seq_nr) { + if (dummy != res->sr_slot->seq_nr) { dprintk("%s Invalid sequence number\n", __func__); goto out_err; } /* slot id */ dummy = be32_to_cpup(p++); - if (dummy != res->sr_slotid) { + if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) { dprintk("%s Invalid slot id\n", __func__); goto out_err; } @@ -4731,6 +4877,134 @@ out_overflow: #endif /* CONFIG_NFS_V4_1 */ } +#if defined(CONFIG_NFS_V4_1) + +static int decode_getdeviceinfo(struct xdr_stream *xdr, + struct pnfs_device *pdev) +{ + __be32 *p; + uint32_t len, type; + int status; + + status = decode_op_hdr(xdr, OP_GETDEVICEINFO); + if (status) { + if (status == -ETOOSMALL) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + pdev->mincount = be32_to_cpup(p); + dprintk("%s: Min count too small. mincnt = %u\n", + __func__, pdev->mincount); + } + return status; + } + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + type = be32_to_cpup(p++); + if (type != pdev->layout_type) { + dprintk("%s: layout mismatch req: %u pdev: %u\n", + __func__, pdev->layout_type, type); + return -EINVAL; + } + /* + * Get the length of the opaque device_addr4. xdr_read_pages places + * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages) + * and places the remaining xdr data in xdr_buf->tail + */ + pdev->mincount = be32_to_cpup(p); + xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ + + /* Parse notification bitmap, verifying that it is zero. */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + if (len) { + uint32_t i; + + p = xdr_inline_decode(xdr, 4 * len); + if (unlikely(!p)) + goto out_overflow; + for (i = 0; i < len; i++, p++) { + if (be32_to_cpup(p)) { + dprintk("%s: notifications not supported\n", + __func__); + return -EIO; + } + } + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs4_layoutget_res *res) +{ + __be32 *p; + int status; + u32 layout_count; + + status = decode_op_hdr(xdr, OP_LAYOUTGET); + if (status) + return status; + p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); + if (unlikely(!p)) + goto out_overflow; + res->return_on_close = be32_to_cpup(p++); + p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); + layout_count = be32_to_cpup(p); + if (!layout_count) { + dprintk("%s: server responded with empty layout array\n", + __func__); + return -EINVAL; + } + + p = xdr_inline_decode(xdr, 24); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &res->range.offset); + p = xdr_decode_hyper(p, &res->range.length); + res->range.iomode = be32_to_cpup(p++); + res->type = be32_to_cpup(p++); + + status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); + if (unlikely(status)) + return status; + + dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", + __func__, + (unsigned long)res->range.offset, + (unsigned long)res->range.length, + res->range.iomode, + res->type, + res->layout.len); + + /* nfs4_proc_layoutget allocated a single page */ + if (res->layout.len > PAGE_SIZE) + return -ENOMEM; + memcpy(res->layout.buf, p, res->layout.len); + + if (layout_count > 1) { + /* We only handle a length one array at the moment. Any + * further entries are just ignored. Note that this means + * the client may see a response that is less than the + * minimum it requested. + */ + dprintk("%s: server responded with %d layouts, dropping tail\n", + __func__, layout_count); + } + + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} +#endif /* CONFIG_NFS_V4_1 */ + /* * END OF "GENERIC" DECODE ROUTINES. */ @@ -4738,26 +5012,26 @@ out_overflow: /* * Decode OPEN_DOWNGRADE response */ -static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) +static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs_closeres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_open_downgrade(&xdr, res); + status = decode_open_downgrade(xdr, res); if (status != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4766,26 +5040,25 @@ out: /* * Decode ACCESS response */ -static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res) +static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs4_accessres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_access(&xdr, res); + status = decode_access(xdr, res); if (status != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4794,26 +5067,28 @@ out: /* * Decode LOOKUP response */ -static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) +static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs4_lookup_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_lookup(&xdr)) != 0) + status = decode_lookup(xdr); + if (status) goto out; - if ((status = decode_getfh(&xdr, res->fh)) != 0) + status = decode_getfh(xdr, res->fh); + if (status) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server + status = decode_getfattr(xdr, res->fattr, res->server ,!RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4822,23 +5097,25 @@ out: /* * Decode LOOKUP_ROOT response */ -static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) +static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_lookup_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - if ((status = decode_putrootfh(&xdr)) != 0) + status = decode_putrootfh(xdr); + if (status) goto out; - if ((status = decode_getfh(&xdr, res->fh)) == 0) - status = decode_getfattr(&xdr, res->fattr, res->server, + status = decode_getfh(xdr, res->fh); + if (status == 0) + status = decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4847,24 +5124,25 @@ out: /* * Decode REMOVE response */ -static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res) +static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_removeres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_remove(&xdr, &res->cinfo)) != 0) + status = decode_remove(xdr, &res->cinfo); + if (status) goto out; - decode_getfattr(&xdr, res->dir_attr, res->server, + decode_getfattr(xdr, res->dir_attr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4873,34 +5151,38 @@ out: /* * Decode RENAME response */ -static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) +static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_renameres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_savefh(&xdr)) != 0) + status = decode_savefh(xdr); + if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) + status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); + if (status) goto out; /* Current FH is target directory */ - if (decode_getfattr(&xdr, res->new_fattr, res->server, + if (decode_getfattr(xdr, res->new_fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; - if ((status = decode_restorefh(&xdr)) != 0) + status = decode_restorefh(xdr); + if (status) goto out; - decode_getfattr(&xdr, res->old_fattr, res->server, + decode_getfattr(xdr, res->old_fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4909,37 +5191,41 @@ out: /* * Decode LINK response */ -static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res) +static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs4_link_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_savefh(&xdr)) != 0) + status = decode_savefh(xdr); + if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_link(&xdr, &res->cinfo)) != 0) + status = decode_link(xdr, &res->cinfo); + if (status) goto out; /* * Note order: OP_LINK leaves the directory as the current * filehandle. */ - if (decode_getfattr(&xdr, res->dir_attr, res->server, + if (decode_getfattr(xdr, res->dir_attr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; - if ((status = decode_restorefh(&xdr)) != 0) + status = decode_restorefh(xdr); + if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4948,33 +5234,37 @@ out: /* * Decode CREATE response */ -static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) +static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs4_create_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_savefh(&xdr)) != 0) + status = decode_savefh(xdr); + if (status) goto out; - if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0) + status = decode_create(xdr, &res->dir_cinfo); + if (status) goto out; - if ((status = decode_getfh(&xdr, res->fh)) != 0) + status = decode_getfh(xdr, res->fh); + if (status) goto out; - if (decode_getfattr(&xdr, res->fattr, res->server, + if (decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; - if ((status = decode_restorefh(&xdr)) != 0) + status = decode_restorefh(xdr); + if (status) goto out; - decode_getfattr(&xdr, res->dir_fattr, res->server, + decode_getfattr(xdr, res->dir_fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -4983,31 +5273,31 @@ out: /* * Decode SYMLINK response */ -static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) +static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs4_create_res *res) { - return nfs4_xdr_dec_create(rqstp, p, res); + return nfs4_xdr_dec_create(rqstp, xdr, res); } /* * Decode GETATTR response */ -static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res) +static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs4_getattr_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server, + status = decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -5016,46 +5306,40 @@ out: /* * Encode an SETACL request */ -static int -nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args) +static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr, + struct nfs_setaclargs *args) { - struct xdr_stream xdr; struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - int status; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, req, &hdr); - encode_sequence(&xdr, &args->seq_args, &hdr); - encode_putfh(&xdr, args->fh, &hdr); - status = encode_setacl(&xdr, args, &hdr); + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_setacl(xdr, args, &hdr); encode_nops(&hdr); - return status; } /* * Decode SETACL response */ static int -nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct nfs_setaclres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_setattr(&xdr); + status = decode_setattr(xdr); out: return status; } @@ -5064,24 +5348,22 @@ out: * Decode GETACL response */ static int -nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct nfs_getaclres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_getacl(&xdr, rqstp, &res->acl_len); + status = decode_getacl(xdr, rqstp, &res->acl_len); out: return status; @@ -5090,23 +5372,22 @@ out: /* * Decode CLOSE response */ -static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) +static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_closeres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_close(&xdr, res); + status = decode_close(xdr, res); if (status != 0) goto out; /* @@ -5115,7 +5396,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos * an ESTALE error. Shouldn't be a problem, * though, since fattr->valid will remain unset. */ - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -5124,36 +5405,35 @@ out: /* * Decode OPEN response */ -static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) +static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_openres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_savefh(&xdr); + status = decode_savefh(xdr); if (status) goto out; - status = decode_open(&xdr, res); + status = decode_open(xdr, res); if (status) goto out; - if (decode_getfh(&xdr, &res->fh) != 0) + if (decode_getfh(xdr, &res->fh) != 0) goto out; - if (decode_getfattr(&xdr, res->f_attr, res->server, + if (decode_getfattr(xdr, res->f_attr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; - if (decode_restorefh(&xdr) != 0) + if (decode_restorefh(xdr) != 0) goto out; - decode_getfattr(&xdr, res->dir_attr, res->server, + decode_getfattr(xdr, res->dir_attr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -5162,20 +5442,20 @@ out: /* * Decode OPEN_CONFIRM response */ -static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res) +static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs_open_confirmres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_open_confirm(&xdr, res); + status = decode_open_confirm(xdr, res); out: return status; } @@ -5183,26 +5463,26 @@ out: /* * Decode OPEN response */ -static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) +static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs_openres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_open(&xdr, res); + status = decode_open(xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->f_attr, res->server, + decode_getfattr(xdr, res->f_attr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -5211,26 +5491,26 @@ out: /* * Decode SETATTR response */ -static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res) +static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs_setattrres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_setattr(&xdr); + status = decode_setattr(xdr); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -5239,23 +5519,22 @@ out: /* * Decode LOCK response */ -static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res) +static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_lock_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_lock(&xdr, res); + status = decode_lock(xdr, res); out: return status; } @@ -5263,23 +5542,22 @@ out: /* * Decode LOCKT response */ -static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res) +static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_lockt_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_lockt(&xdr, res); + status = decode_lockt(xdr, res); out: return status; } @@ -5287,61 +5565,58 @@ out: /* * Decode LOCKU response */ -static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res) +static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_locku_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_locku(&xdr, res); + status = decode_locku(xdr, res); out: return status; } -static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy) +static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, void *dummy) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_release_lockowner(&xdr); + status = decode_release_lockowner(xdr); return status; } /* * Decode READLINK response */ -static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, +static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, struct nfs4_readlink_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_readlink(&xdr, rqstp); + status = decode_readlink(xdr, rqstp); out: return status; } @@ -5349,23 +5624,22 @@ out: /* * Decode READDIR response */ -static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res) +static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs4_readdir_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_readdir(&xdr, rqstp, res); + status = decode_readdir(xdr, rqstp, res); out: return status; } @@ -5373,23 +5647,22 @@ out: /* * Decode Read response */ -static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res) +static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_readres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_read(&xdr, rqstp, res); + status = decode_read(xdr, rqstp, res); if (!status) status = res->count; out: @@ -5399,26 +5672,25 @@ out: /* * Decode WRITE response */ -static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) +static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_writeres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_write(&xdr, res); + status = decode_write(xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); if (!status) status = res->count; @@ -5429,26 +5701,25 @@ out: /* * Decode COMMIT response */ -static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) +static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfs_writeres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status) goto out; - status = decode_commit(&xdr, res); + status = decode_commit(xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -5457,85 +5728,80 @@ out: /* * Decode FSINFO response */ -static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, +static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr, struct nfs4_fsinfo_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_sequence(&xdr, &res->seq_res, req); + status = decode_sequence(xdr, &res->seq_res, req); if (!status) - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (!status) - status = decode_fsinfo(&xdr, res->fsinfo); + status = decode_fsinfo(xdr, res->fsinfo); return status; } /* * Decode PATHCONF response */ -static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, +static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr, struct nfs4_pathconf_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_sequence(&xdr, &res->seq_res, req); + status = decode_sequence(xdr, &res->seq_res, req); if (!status) - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (!status) - status = decode_pathconf(&xdr, res->pathconf); + status = decode_pathconf(xdr, res->pathconf); return status; } /* * Decode STATFS response */ -static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, +static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr, struct nfs4_statfs_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_sequence(&xdr, &res->seq_res, req); + status = decode_sequence(xdr, &res->seq_res, req); if (!status) - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (!status) - status = decode_statfs(&xdr, res->fsstat); + status = decode_statfs(xdr, res->fsstat); return status; } /* * Decode GETATTR_BITMAP response */ -static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res) +static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_server_caps_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, req); + status = decode_sequence(xdr, &res->seq_res, req); if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - status = decode_server_caps(&xdr, res); + status = decode_server_caps(xdr, res); out: return status; } @@ -5543,79 +5809,77 @@ out: /* * Decode RENEW response */ -static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy) +static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *__unused) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_renew(&xdr); + status = decode_renew(xdr); return status; } /* * Decode SETCLIENTID response */ -static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, - struct nfs4_setclientid_res *res) +static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_setclientid_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_setclientid(&xdr, res); + status = decode_setclientid(xdr, res); return status; } /* * Decode SETCLIENTID_CONFIRM response */ -static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) +static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs_fsinfo *fsinfo) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_setclientid_confirm(&xdr); + status = decode_setclientid_confirm(xdr); if (!status) - status = decode_putrootfh(&xdr); + status = decode_putrootfh(xdr); if (!status) - status = decode_fsinfo(&xdr, fsinfo); + status = decode_fsinfo(xdr, fsinfo); return status; } /* * Decode DELEGRETURN response */ -static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res) +static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_delegreturnres *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (status) goto out; - status = decode_putfh(&xdr); + status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_delegreturn(&xdr); + status = decode_delegreturn(xdr); if (status != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server, + decode_getfattr(xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; @@ -5624,26 +5888,27 @@ out: /* * Decode FS_LOCATIONS response */ -static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, + struct xdr_stream *xdr, struct nfs4_fs_locations_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; - status = decode_sequence(&xdr, &res->seq_res, req); + status = decode_sequence(xdr, &res->seq_res, req); if (status) goto out; - if ((status = decode_putfh(&xdr)) != 0) + status = decode_putfh(xdr); + if (status) goto out; - if ((status = decode_lookup(&xdr)) != 0) + status = decode_lookup(xdr); + if (status) goto out; - xdr_enter_page(&xdr, PAGE_SIZE); - status = decode_getfattr(&xdr, &res->fs_locations->fattr, + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr(xdr, &res->fs_locations->fattr, res->fs_locations->server, !RPC_IS_ASYNC(req->rq_task)); out: @@ -5654,129 +5919,194 @@ out: /* * Decode EXCHANGE_ID response */ -static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, +static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, void *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_exchange_id(&xdr, res); + status = decode_exchange_id(xdr, res); return status; } /* * Decode CREATE_SESSION response */ -static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, +static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, struct nfs41_create_session_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_create_session(&xdr, res); + status = decode_create_session(xdr, res); return status; } /* * Decode DESTROY_SESSION response */ -static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, - void *dummy) +static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_destroy_session(&xdr, dummy); + status = decode_destroy_session(xdr, res); return status; } /* * Decode SEQUENCE response */ -static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, +static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, struct nfs4_sequence_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_sequence(&xdr, res, rqstp); + status = decode_sequence(xdr, res, rqstp); return status; } /* * Decode GET_LEASE_TIME response */ -static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, +static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, struct nfs4_get_lease_time_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_sequence(&xdr, &res->lr_seq_res, rqstp); + status = decode_sequence(xdr, &res->lr_seq_res, rqstp); if (!status) - status = decode_putrootfh(&xdr); + status = decode_putrootfh(xdr); if (!status) - status = decode_fsinfo(&xdr, res->lr_fsinfo); + status = decode_fsinfo(xdr, res->lr_fsinfo); return status; } /* * Decode RECLAIM_COMPLETE response */ -static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, +static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, struct nfs41_reclaim_complete_res *res) { - struct xdr_stream xdr; struct compound_hdr hdr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); + status = decode_compound_hdr(xdr, &hdr); if (!status) - status = decode_sequence(&xdr, &res->seq_res, rqstp); + status = decode_sequence(xdr, &res->seq_res, rqstp); if (!status) - status = decode_reclaim_complete(&xdr, (void *)NULL); + status = decode_reclaim_complete(xdr, (void *)NULL); + return status; +} + +/* + * Decode GETDEVINFO response + */ +static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_getdeviceinfo_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status != 0) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status != 0) + goto out; + status = decode_getdeviceinfo(xdr, res->pdev); +out: + return status; +} + +/* + * Decode LAYOUTGET response + */ +static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_layoutget_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_layoutget(xdr, rqstp, res); +out: return status; } #endif /* CONFIG_NFS_V4_1 */ -__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) +/** + * nfs4_decode_dirent - Decode a single NFSv4 directory entry stored in + * the local page cache. + * @xdr: XDR stream where entry resides + * @entry: buffer to fill in with entry data + * @plus: boolean indicating whether this should be a readdirplus entry + * + * Returns zero if successful, otherwise a negative errno value is + * returned. + * + * This function is not invoked during READDIR reply decoding, but + * rather whenever an application invokes the getdents(2) system call + * on a directory already in our cache. + */ +int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + int plus) { uint32_t bitmap[2] = {0}; uint32_t len; - - if (!*p++) { - if (!*p) - return ERR_PTR(-EAGAIN); + __be32 *p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (*p == xdr_zero) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + if (*p == xdr_zero) + return -EAGAIN; entry->eof = 1; - return ERR_PTR(-EBADCOOKIE); + return -EBADCOOKIE; } + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; entry->prev_cookie = entry->cookie; p = xdr_decode_hyper(p, &entry->cookie); - entry->len = ntohl(*p++); + entry->len = be32_to_cpup(p); + + p = xdr_inline_decode(xdr, entry->len); + if (unlikely(!p)) + goto out_overflow; entry->name = (const char *) p; - p += XDR_QUADLEN(entry->len); /* * In case the server doesn't return an inode number, @@ -5784,32 +6114,29 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) * since glibc seems to choke on it...) */ entry->ino = 1; + entry->fattr->valid = 0; - len = ntohl(*p++); /* bitmap length */ - if (len-- > 0) { - bitmap[0] = ntohl(*p++); - if (len-- > 0) { - bitmap[1] = ntohl(*p++); - p += len; - } - } - len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ - if (len > 0) { - if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { - bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; - /* Ignore the return value of rdattr_error for now */ - p++; - len--; - } - if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) - xdr_decode_hyper(p, &entry->ino); - else if (bitmap[0] == FATTR4_WORD0_FILEID) - xdr_decode_hyper(p, &entry->ino); - p += len; - } + if (decode_attr_bitmap(xdr, bitmap) < 0) + goto out_overflow; - entry->eof = !p[0] && p[1]; - return p; + if (decode_attr_length(xdr, &len, &p) < 0) + goto out_overflow; + + if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, + entry->server, 1) < 0) + goto out_overflow; + if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) + entry->ino = entry->fattr->fileid; + + entry->d_type = DT_UNKNOWN; + if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) + entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + + return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EAGAIN; } /* @@ -5885,8 +6212,8 @@ nfs4_stat_to_errno(int stat) #define PROC(proc, argtype, restype) \ [NFSPROC4_CLNT_##proc] = { \ .p_proc = NFSPROC4_COMPOUND, \ - .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ - .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ + .p_encode = (kxdreproc_t)nfs4_xdr_##argtype, \ + .p_decode = (kxdrdproc_t)nfs4_xdr_##restype, \ .p_arglen = NFS4_##argtype##_sz, \ .p_replen = NFS4_##restype##_sz, \ .p_statidx = NFSPROC4_CLNT_##proc, \ @@ -5894,48 +6221,50 @@ nfs4_stat_to_errno(int stat) } struct rpc_procinfo nfs4_procedures[] = { - PROC(READ, enc_read, dec_read), - PROC(WRITE, enc_write, dec_write), - PROC(COMMIT, enc_commit, dec_commit), - PROC(OPEN, enc_open, dec_open), - PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), - PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), - PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), - PROC(CLOSE, enc_close, dec_close), - PROC(SETATTR, enc_setattr, dec_setattr), - PROC(FSINFO, enc_fsinfo, dec_fsinfo), - PROC(RENEW, enc_renew, dec_renew), - PROC(SETCLIENTID, enc_setclientid, dec_setclientid), - PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), - PROC(LOCK, enc_lock, dec_lock), - PROC(LOCKT, enc_lockt, dec_lockt), - PROC(LOCKU, enc_locku, dec_locku), - PROC(ACCESS, enc_access, dec_access), - PROC(GETATTR, enc_getattr, dec_getattr), - PROC(LOOKUP, enc_lookup, dec_lookup), - PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), - PROC(REMOVE, enc_remove, dec_remove), - PROC(RENAME, enc_rename, dec_rename), - PROC(LINK, enc_link, dec_link), - PROC(SYMLINK, enc_symlink, dec_symlink), - PROC(CREATE, enc_create, dec_create), - PROC(PATHCONF, enc_pathconf, dec_pathconf), - PROC(STATFS, enc_statfs, dec_statfs), - PROC(READLINK, enc_readlink, dec_readlink), - PROC(READDIR, enc_readdir, dec_readdir), - PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), - PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), - PROC(GETACL, enc_getacl, dec_getacl), - PROC(SETACL, enc_setacl, dec_setacl), - PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), - PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), + PROC(READ, enc_read, dec_read), + PROC(WRITE, enc_write, dec_write), + PROC(COMMIT, enc_commit, dec_commit), + PROC(OPEN, enc_open, dec_open), + PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), + PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), + PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), + PROC(CLOSE, enc_close, dec_close), + PROC(SETATTR, enc_setattr, dec_setattr), + PROC(FSINFO, enc_fsinfo, dec_fsinfo), + PROC(RENEW, enc_renew, dec_renew), + PROC(SETCLIENTID, enc_setclientid, dec_setclientid), + PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), + PROC(ACCESS, enc_access, dec_access), + PROC(GETATTR, enc_getattr, dec_getattr), + PROC(LOOKUP, enc_lookup, dec_lookup), + PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), + PROC(REMOVE, enc_remove, dec_remove), + PROC(RENAME, enc_rename, dec_rename), + PROC(LINK, enc_link, dec_link), + PROC(SYMLINK, enc_symlink, dec_symlink), + PROC(CREATE, enc_create, dec_create), + PROC(PATHCONF, enc_pathconf, dec_pathconf), + PROC(STATFS, enc_statfs, dec_statfs), + PROC(READLINK, enc_readlink, dec_readlink), + PROC(READDIR, enc_readdir, dec_readdir), + PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), + PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), + PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), + PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), #if defined(CONFIG_NFS_V4_1) - PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), - PROC(CREATE_SESSION, enc_create_session, dec_create_session), - PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), - PROC(SEQUENCE, enc_sequence, dec_sequence), - PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), - PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), + PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), + PROC(CREATE_SESSION, enc_create_session, dec_create_session), + PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), + PROC(SEQUENCE, enc_sequence, dec_sequence), + PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), + PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), + PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), + PROC(LAYOUTGET, enc_layoutget, dec_layoutget), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index df101d9f546..903908a2002 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -3,9 +3,10 @@ * * Allow an NFS filesystem to be mounted as root. The way this works is: * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. - * (2) Handle RPC negotiation with the system which replied to RARP or - * was reported as a boot server by BOOTP or manually. - * (3) The actual mounting is done later, when init() is running. + * (2) Construct the device string and the options string using DHCP + * option 17 and/or kernel command line options. + * (3) When mount_root() sets up the root file system, pass these strings + * to the NFS client's regular mount interface via sys_mount(). * * * Changes: @@ -65,470 +66,245 @@ * Hua Qin : Support for mounting root file system via * NFS over TCP. * Fabian Frederick: Option parser rebuilt (using parser lib) -*/ + * Chuck Lever : Use super.c's text-based mount option parsing + * Chuck Lever : Add "nfsrootdebug". + */ #include <linux/types.h> #include <linux/string.h> -#include <linux/kernel.h> -#include <linux/time.h> -#include <linux/fs.h> #include <linux/init.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xprtsock.h> #include <linux/nfs.h> #include <linux/nfs_fs.h> -#include <linux/nfs_mount.h> -#include <linux/in.h> -#include <linux/major.h> #include <linux/utsname.h> -#include <linux/inet.h> #include <linux/root_dev.h> #include <net/ipconfig.h> -#include <linux/parser.h> #include "internal.h" -/* Define this to allow debugging output */ -#undef NFSROOT_DEBUG #define NFSDBG_FACILITY NFSDBG_ROOT -/* Default port to use if server is not running a portmapper */ -#define NFS_MNT_PORT 627 - /* Default path we try to mount. "%s" gets replaced by our IP address */ #define NFS_ROOT "/tftpboot/%s" /* Parameters passed from the kernel command line */ -static char nfs_root_name[256] __initdata = ""; +static char nfs_root_parms[256] __initdata = ""; + +/* Text-based mount options passed to super.c */ +static char nfs_root_options[256] __initdata = ""; /* Address of NFS server */ -static __be32 servaddr __initdata = 0; +static __be32 servaddr __initdata = htonl(INADDR_NONE); /* Name of directory to mount */ -static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, }; - -/* NFS-related data */ -static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ -static int nfs_port __initdata = 0; /* Port to connect to for NFS */ -static int mount_port __initdata = 0; /* Mount daemon port number */ - - -/*************************************************************************** - - Parsing of options - - ***************************************************************************/ - -enum { - /* Options that take integer arguments */ - Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin, - Opt_acregmax, Opt_acdirmin, Opt_acdirmax, - /* Options that take no arguments */ - Opt_soft, Opt_hard, Opt_intr, - Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, - Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, - Opt_acl, Opt_noacl, - /* Error token */ - Opt_err -}; - -static const match_table_t tokens __initconst = { - {Opt_port, "port=%u"}, - {Opt_rsize, "rsize=%u"}, - {Opt_wsize, "wsize=%u"}, - {Opt_timeo, "timeo=%u"}, - {Opt_retrans, "retrans=%u"}, - {Opt_acregmin, "acregmin=%u"}, - {Opt_acregmax, "acregmax=%u"}, - {Opt_acdirmin, "acdirmin=%u"}, - {Opt_acdirmax, "acdirmax=%u"}, - {Opt_soft, "soft"}, - {Opt_hard, "hard"}, - {Opt_intr, "intr"}, - {Opt_nointr, "nointr"}, - {Opt_posix, "posix"}, - {Opt_noposix, "noposix"}, - {Opt_cto, "cto"}, - {Opt_nocto, "nocto"}, - {Opt_ac, "ac"}, - {Opt_noac, "noac"}, - {Opt_lock, "lock"}, - {Opt_nolock, "nolock"}, - {Opt_v2, "nfsvers=2"}, - {Opt_v2, "v2"}, - {Opt_v3, "nfsvers=3"}, - {Opt_v3, "v3"}, - {Opt_udp, "proto=udp"}, - {Opt_udp, "udp"}, - {Opt_tcp, "proto=tcp"}, - {Opt_tcp, "tcp"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_err, NULL} - -}; +static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = ""; +/* server:export path string passed to super.c */ +static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; + +#ifdef RPC_DEBUG /* - * Parse option string. + * When the "nfsrootdebug" kernel command line option is specified, + * enable debugging messages for NFSROOT. */ - -static int __init root_nfs_parse(char *name, char *buf) +static int __init nfs_root_debug(char *__unused) { - - char *p; - substring_t args[MAX_OPT_ARGS]; - int option; - - if (!name) - return 1; - - /* Set the NFS remote path */ - p = strsep(&name, ","); - if (p[0] != '\0' && strcmp(p, "default") != 0) - strlcpy(buf, p, NFS_MAXPATHLEN); - - while ((p = strsep (&name, ",")) != NULL) { - int token; - if (!*p) - continue; - token = match_token(p, tokens, args); - - /* %u tokens only. Beware if you add new tokens! */ - if (token < Opt_soft && match_int(&args[0], &option)) - return 0; - switch (token) { - case Opt_port: - nfs_port = option; - break; - case Opt_rsize: - nfs_data.rsize = option; - break; - case Opt_wsize: - nfs_data.wsize = option; - break; - case Opt_timeo: - nfs_data.timeo = option; - break; - case Opt_retrans: - nfs_data.retrans = option; - break; - case Opt_acregmin: - nfs_data.acregmin = option; - break; - case Opt_acregmax: - nfs_data.acregmax = option; - break; - case Opt_acdirmin: - nfs_data.acdirmin = option; - break; - case Opt_acdirmax: - nfs_data.acdirmax = option; - break; - case Opt_soft: - nfs_data.flags |= NFS_MOUNT_SOFT; - break; - case Opt_hard: - nfs_data.flags &= ~NFS_MOUNT_SOFT; - break; - case Opt_intr: - case Opt_nointr: - break; - case Opt_posix: - nfs_data.flags |= NFS_MOUNT_POSIX; - break; - case Opt_noposix: - nfs_data.flags &= ~NFS_MOUNT_POSIX; - break; - case Opt_cto: - nfs_data.flags &= ~NFS_MOUNT_NOCTO; - break; - case Opt_nocto: - nfs_data.flags |= NFS_MOUNT_NOCTO; - break; - case Opt_ac: - nfs_data.flags &= ~NFS_MOUNT_NOAC; - break; - case Opt_noac: - nfs_data.flags |= NFS_MOUNT_NOAC; - break; - case Opt_lock: - nfs_data.flags &= ~NFS_MOUNT_NONLM; - break; - case Opt_nolock: - nfs_data.flags |= NFS_MOUNT_NONLM; - break; - case Opt_v2: - nfs_data.flags &= ~NFS_MOUNT_VER3; - break; - case Opt_v3: - nfs_data.flags |= NFS_MOUNT_VER3; - break; - case Opt_udp: - nfs_data.flags &= ~NFS_MOUNT_TCP; - break; - case Opt_tcp: - nfs_data.flags |= NFS_MOUNT_TCP; - break; - case Opt_acl: - nfs_data.flags &= ~NFS_MOUNT_NOACL; - break; - case Opt_noacl: - nfs_data.flags |= NFS_MOUNT_NOACL; - break; - default: - printk(KERN_WARNING "Root-NFS: unknown " - "option: %s\n", p); - return 0; - } - } - + nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT; return 1; } +__setup("nfsrootdebug", nfs_root_debug); +#endif + /* - * Prepare the NFS data structure and parse all options. + * Parse NFS server and directory information passed on the kernel + * command line. + * + * nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>] + * + * If there is a "%s" token in the <root-dir> string, it is replaced + * by the ASCII-representation of the client's IP address. */ -static int __init root_nfs_name(char *name) +static int __init nfs_root_setup(char *line) { - static char buf[NFS_MAXPATHLEN] __initdata; - char *cp; - - /* Set some default values */ - memset(&nfs_data, 0, sizeof(nfs_data)); - nfs_port = -1; - nfs_data.version = NFS_MOUNT_VERSION; - nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ - nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; - nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; - nfs_data.acregmin = NFS_DEF_ACREGMIN; - nfs_data.acregmax = NFS_DEF_ACREGMAX; - nfs_data.acdirmin = NFS_DEF_ACDIRMIN; - nfs_data.acdirmax = NFS_DEF_ACDIRMAX; - strcpy(buf, NFS_ROOT); - - /* Process options received from the remote server */ - root_nfs_parse(root_server_path, buf); - - /* Override them by options set on kernel command-line */ - root_nfs_parse(name, buf); - - cp = utsname()->nodename; - if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { - printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); - return -1; + ROOT_DEV = Root_NFS; + + if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { + strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms)); + } else { + size_t n = strlen(line) + sizeof(NFS_ROOT) - 1; + if (n >= sizeof(nfs_root_parms)) + line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0'; + sprintf(nfs_root_parms, NFS_ROOT, line); } - sprintf(nfs_export_path, buf, cp); + + /* + * Extract the IP address of the NFS server containing our + * root file system, if one was specified. + * + * Note: root_nfs_parse_addr() removes the server-ip from + * nfs_root_parms, if it exists. + */ + root_server_addr = root_nfs_parse_addr(nfs_root_parms); return 1; } +__setup("nfsroot=", nfs_root_setup); -/* - * Get NFS server address. - */ -static int __init root_nfs_addr(void) +static int __init root_nfs_copy(char *dest, const char *src, + const size_t destlen) { - if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) { - printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n"); + if (strlcpy(dest, src, destlen) > destlen) return -1; - } + return 0; +} - snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), - "%pI4", &servaddr); +static int __init root_nfs_cat(char *dest, const char *src, + const size_t destlen) +{ + if (strlcat(dest, src, destlen) > destlen) + return -1; return 0; } /* - * Tell the user what's going on. + * Parse out root export path and mount options from + * passed-in string @incoming. + * + * Copy the export path into @exppath. */ -#ifdef NFSROOT_DEBUG -static void __init root_nfs_print(void) +static int __init root_nfs_parse_options(char *incoming, char *exppath, + const size_t exppathlen) { - printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", - nfs_export_path, nfs_data.hostname); - printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", - nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); - printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", - nfs_data.acregmin, nfs_data.acregmax, - nfs_data.acdirmin, nfs_data.acdirmax); - printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n", - nfs_port, mount_port, nfs_data.flags); -} -#endif - + char *p; -static int __init root_nfs_init(void) -{ -#ifdef NFSROOT_DEBUG - nfs_debug |= NFSDBG_ROOT; -#endif + /* + * Set the NFS remote path + */ + p = strsep(&incoming, ","); + if (*p != '\0' && strcmp(p, "default") != 0) + if (root_nfs_copy(exppath, p, exppathlen)) + return -1; /* - * Decode the root directory path name and NFS options from - * the kernel command line. This has to go here in order to - * be able to use the client IP address for the remote root - * directory (necessary for pure RARP booting). + * @incoming now points to the rest of the string; if it + * contains something, append it to our root options buffer */ - if (root_nfs_name(nfs_root_name) < 0 || - root_nfs_addr() < 0) - return -1; + if (incoming != NULL && *incoming != '\0') + if (root_nfs_cat(nfs_root_options, incoming, + sizeof(nfs_root_options))) + return -1; -#ifdef NFSROOT_DEBUG - root_nfs_print(); -#endif + /* + * Possibly prepare for more options to be appended + */ + if (nfs_root_options[0] != '\0' && + nfs_root_options[strlen(nfs_root_options)] != ',') + if (root_nfs_cat(nfs_root_options, ",", + sizeof(nfs_root_options))) + return -1; return 0; } - /* - * Parse NFS server and directory information passed on the kernel - * command line. + * Decode the export directory path name and NFS options from + * the kernel command line. This has to be done late in order to + * use a dynamically acquired client IP address for the remote + * root directory path. + * + * Returns zero if successful; otherwise -1 is returned. */ -static int __init nfs_root_setup(char *line) +static int __init root_nfs_data(char *cmdline) { - ROOT_DEV = Root_NFS; - if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { - strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); - } else { - int n = strlen(line) + sizeof(NFS_ROOT) - 1; - if (n >= sizeof(nfs_root_name)) - line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; - sprintf(nfs_root_name, NFS_ROOT, line); + char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; + int len, retval = -1; + char *tmp = NULL; + const size_t tmplen = sizeof(nfs_export_path); + + tmp = kzalloc(tmplen, GFP_KERNEL); + if (tmp == NULL) + goto out_nomem; + strcpy(tmp, NFS_ROOT); + + if (root_server_path[0] != '\0') { + dprintk("Root-NFS: DHCPv4 option 17: %s\n", + root_server_path); + if (root_nfs_parse_options(root_server_path, tmp, tmplen)) + goto out_optionstoolong; } - root_server_addr = root_nfs_parse_addr(nfs_root_name); - return 1; -} - -__setup("nfsroot=", nfs_root_setup); - -/*************************************************************************** - Routines to actually mount the root directory + if (cmdline[0] != '\0') { + dprintk("Root-NFS: nfsroot=%s\n", cmdline); + if (root_nfs_parse_options(cmdline, tmp, tmplen)) + goto out_optionstoolong; + } - ***************************************************************************/ + /* + * Append mandatory options for nfsroot so they override + * what has come before + */ + snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", + &servaddr); + if (root_nfs_cat(nfs_root_options, addr_option, + sizeof(nfs_root_options))) + goto out_optionstoolong; -/* - * Construct sockaddr_in from address and port number. - */ -static inline void -set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) -{ - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = addr; - sin->sin_port = port; -} + /* + * Set up nfs_root_device. For NFS mounts, this looks like + * + * server:/path + * + * At this point, utsname()->nodename contains our local + * IP address or hostname, set by ipconfig. If "%s" exists + * in tmp, substitute the nodename, then shovel the whole + * mess into nfs_root_device. + */ + len = snprintf(nfs_export_path, sizeof(nfs_export_path), + tmp, utsname()->nodename); + if (len > (int)sizeof(nfs_export_path)) + goto out_devnametoolong; + len = snprintf(nfs_root_device, sizeof(nfs_root_device), + "%pI4:%s", &servaddr, nfs_export_path); + if (len > (int)sizeof(nfs_root_device)) + goto out_devnametoolong; -/* - * Query server portmapper for the port of a daemon program. - */ -static int __init root_nfs_getport(int program, int version, int proto) -{ - struct sockaddr_in sin; + retval = 0; - printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n", - program, version, &servaddr); - set_sockaddr(&sin, servaddr, 0); - return rpcb_getport_sync(&sin, program, version, proto); +out: + kfree(tmp); + return retval; +out_nomem: + printk(KERN_ERR "Root-NFS: could not allocate memory\n"); + goto out; +out_optionstoolong: + printk(KERN_ERR "Root-NFS: mount options string too long\n"); + goto out; +out_devnametoolong: + printk(KERN_ERR "Root-NFS: root device name too long.\n"); + goto out; } - -/* - * Use portmapper to find mountd and nfsd port numbers if not overriden - * by the user. Use defaults if portmapper is not available. - * XXX: Is there any nfs server with no portmapper? +/** + * nfs_root_data - Return prepared 'data' for NFSROOT mount + * @root_device: OUT: address of string containing NFSROOT device + * @root_data: OUT: address of string containing NFSROOT mount options + * + * Returns zero and sets @root_device and @root_data if successful, + * otherwise -1 is returned. */ -static int __init root_nfs_ports(void) +int __init nfs_root_data(char **root_device, char **root_data) { - int port; - int nfsd_ver, mountd_ver; - int nfsd_port, mountd_port; - int proto; - - if (nfs_data.flags & NFS_MOUNT_VER3) { - nfsd_ver = NFS3_VERSION; - mountd_ver = NFS_MNT3_VERSION; - nfsd_port = NFS_PORT; - mountd_port = NFS_MNT_PORT; - } else { - nfsd_ver = NFS2_VERSION; - mountd_ver = NFS_MNT_VERSION; - nfsd_port = NFS_PORT; - mountd_port = NFS_MNT_PORT; - } - - proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - - if (nfs_port < 0) { - if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) { - printk(KERN_ERR "Root-NFS: Unable to get nfsd port " - "number from server, using default\n"); - port = nfsd_port; - } - nfs_port = port; - dprintk("Root-NFS: Portmapper on server returned %d " - "as nfsd port\n", port); + servaddr = root_server_addr; + if (servaddr == htonl(INADDR_NONE)) { + printk(KERN_ERR "Root-NFS: no NFS server address\n"); + return -1; } - if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { - printk(KERN_ERR "Root-NFS: Unable to get mountd port " - "number from server, using default\n"); - port = mountd_port; - } - mount_port = port; - dprintk("Root-NFS: mountd port is %d\n", port); + if (root_nfs_data(nfs_root_parms) < 0) + return -1; + *root_device = nfs_root_device; + *root_data = nfs_root_options; return 0; } - - -/* - * Get a file handle from the server for the directory which is to be - * mounted. - */ -static int __init root_nfs_get_handle(void) -{ - struct sockaddr_in sin; - unsigned int auth_flav_len = 0; - struct nfs_mount_request request = { - .sap = (struct sockaddr *)&sin, - .salen = sizeof(sin), - .dirpath = nfs_export_path, - .version = (nfs_data.flags & NFS_MOUNT_VER3) ? - NFS_MNT3_VERSION : NFS_MNT_VERSION, - .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? - XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, - .auth_flav_len = &auth_flav_len, - }; - int status = -ENOMEM; - - request.fh = nfs_alloc_fhandle(); - if (!request.fh) - goto out; - set_sockaddr(&sin, servaddr, htons(mount_port)); - status = nfs_mount(&request); - if (status < 0) - printk(KERN_ERR "Root-NFS: Server returned error %d " - "while mounting %s\n", status, nfs_export_path); - else { - nfs_data.root.size = request.fh->size; - memcpy(&nfs_data.root.data, request.fh->data, request.fh->size); - } - nfs_free_fhandle(request.fh); -out: - return status; -} - -/* - * Get the NFS port numbers and file handle, and return the prepared 'data' - * argument for mount() if everything went OK. Return NULL otherwise. - */ -void * __init nfs_root_data(void) -{ - if (root_nfs_init() < 0 - || root_nfs_ports() < 0 - || root_nfs_get_handle() < 0) - return NULL; - set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port)); - return (void*)&nfs_data; -} diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 919490232e1..e1164e3f9e6 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -26,12 +26,9 @@ static struct kmem_cache *nfs_page_cachep; static inline struct nfs_page * nfs_page_alloc(void) { - struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL); - if (p) { - memset(p, 0, sizeof(*p)); + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); + if (p) INIT_LIST_HEAD(&p->wb_list); - } return p; } @@ -65,6 +62,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, if (req == NULL) return ERR_PTR(-ENOMEM); + /* get lock context early so we can deal with alloc failures */ + req->wb_lock_context = nfs_get_lock_context(ctx); + if (req->wb_lock_context == NULL) { + nfs_page_free(req); + return ERR_PTR(-ENOMEM); + } + /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ @@ -79,7 +83,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_pgbase = offset; req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); - req->wb_lock_context = nfs_get_lock_context(ctx); kref_init(&req->wb_kref); return req; } @@ -109,7 +112,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) { if (!nfs_lock_request_dontget(req)) return 0; - if (req->wb_page != NULL) + if (test_bit(PG_MAPPED, &req->wb_flags)) radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } @@ -119,7 +122,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) */ void nfs_clear_page_tag_locked(struct nfs_page *req) { - if (req->wb_page != NULL) { + if (test_bit(PG_MAPPED, &req->wb_flags)) { struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c new file mode 100644 index 00000000000..1b1bc1a0fb0 --- /dev/null +++ b/fs/nfs/pnfs.c @@ -0,0 +1,965 @@ +/* + * pNFS functions to call and manage layout drivers. + * + * Copyright (c) 2002 [year of first publication] + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include <linux/nfs_fs.h> +#include "internal.h" +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS + +/* Locking: + * + * pnfs_spinlock: + * protects pnfs_modules_tbl. + */ +static DEFINE_SPINLOCK(pnfs_spinlock); + +/* + * pnfs_modules_tbl holds all pnfs modules + */ +static LIST_HEAD(pnfs_modules_tbl); + +/* Return the registered pnfs layout driver module matching given id */ +static struct pnfs_layoutdriver_type * +find_pnfs_driver_locked(u32 id) +{ + struct pnfs_layoutdriver_type *local; + + list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) + if (local->id == id) + goto out; + local = NULL; +out: + dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); + return local; +} + +static struct pnfs_layoutdriver_type * +find_pnfs_driver(u32 id) +{ + struct pnfs_layoutdriver_type *local; + + spin_lock(&pnfs_spinlock); + local = find_pnfs_driver_locked(id); + spin_unlock(&pnfs_spinlock); + return local; +} + +void +unset_pnfs_layoutdriver(struct nfs_server *nfss) +{ + if (nfss->pnfs_curr_ld) { + nfss->pnfs_curr_ld->clear_layoutdriver(nfss); + module_put(nfss->pnfs_curr_ld->owner); + } + nfss->pnfs_curr_ld = NULL; +} + +/* + * Try to set the server's pnfs module to the pnfs layout type specified by id. + * Currently only one pNFS layout driver per filesystem is supported. + * + * @id layout type. Zero (illegal layout type) indicates pNFS not in use. + */ +void +set_pnfs_layoutdriver(struct nfs_server *server, u32 id) +{ + struct pnfs_layoutdriver_type *ld_type = NULL; + + if (id == 0) + goto out_no_driver; + if (!(server->nfs_client->cl_exchange_flags & + (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { + printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, + id, server->nfs_client->cl_exchange_flags); + goto out_no_driver; + } + ld_type = find_pnfs_driver(id); + if (!ld_type) { + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); + ld_type = find_pnfs_driver(id); + if (!ld_type) { + dprintk("%s: No pNFS module found for %u.\n", + __func__, id); + goto out_no_driver; + } + } + if (!try_module_get(ld_type->owner)) { + dprintk("%s: Could not grab reference on module\n", __func__); + goto out_no_driver; + } + server->pnfs_curr_ld = ld_type; + if (ld_type->set_layoutdriver(server)) { + printk(KERN_ERR + "%s: Error initializing mount point for layout driver %u.\n", + __func__, id); + module_put(ld_type->owner); + goto out_no_driver; + } + dprintk("%s: pNFS module for %u set\n", __func__, id); + return; + +out_no_driver: + dprintk("%s: Using NFSv4 I/O\n", __func__); + server->pnfs_curr_ld = NULL; +} + +int +pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) +{ + int status = -EINVAL; + struct pnfs_layoutdriver_type *tmp; + + if (ld_type->id == 0) { + printk(KERN_ERR "%s id 0 is reserved\n", __func__); + return status; + } + if (!ld_type->alloc_lseg || !ld_type->free_lseg) { + printk(KERN_ERR "%s Layout driver must provide " + "alloc_lseg and free_lseg.\n", __func__); + return status; + } + + spin_lock(&pnfs_spinlock); + tmp = find_pnfs_driver_locked(ld_type->id); + if (!tmp) { + list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); + status = 0; + dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, + ld_type->name); + } else { + printk(KERN_ERR "%s Module with id %d already loaded!\n", + __func__, ld_type->id); + } + spin_unlock(&pnfs_spinlock); + + return status; +} +EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); + +void +pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) +{ + dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); + spin_lock(&pnfs_spinlock); + list_del(&ld_type->pnfs_tblid); + spin_unlock(&pnfs_spinlock); +} +EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); + +/* + * pNFS client layout cache + */ + +/* Need to hold i_lock if caller does not already hold reference */ +void +get_layout_hdr(struct pnfs_layout_hdr *lo) +{ + atomic_inc(&lo->plh_refcount); +} + +static void +destroy_layout_hdr(struct pnfs_layout_hdr *lo) +{ + dprintk("%s: freeing layout cache %p\n", __func__, lo); + BUG_ON(!list_empty(&lo->plh_layouts)); + NFS_I(lo->plh_inode)->layout = NULL; + kfree(lo); +} + +static void +put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +{ + if (atomic_dec_and_test(&lo->plh_refcount)) + destroy_layout_hdr(lo); +} + +void +put_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = lo->plh_inode; + + if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + destroy_layout_hdr(lo); + spin_unlock(&inode->i_lock); + } +} + +static void +init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) +{ + INIT_LIST_HEAD(&lseg->pls_list); + atomic_set(&lseg->pls_refcount, 1); + smp_mb(); + set_bit(NFS_LSEG_VALID, &lseg->pls_flags); + lseg->pls_layout = lo; +} + +static void free_lseg(struct pnfs_layout_segment *lseg) +{ + struct inode *ino = lseg->pls_layout->plh_inode; + + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + /* Matched by get_layout_hdr in pnfs_insert_layout */ + put_layout_hdr(NFS_I(ino)->layout); +} + +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg + * could sleep, so must be called outside of the lock. + * Returns 1 if object was removed, otherwise return 0. + */ +static int +put_lseg_locked(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list) +{ + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, + atomic_read(&lseg->pls_refcount), + test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + if (atomic_dec_and_test(&lseg->pls_refcount)) { + struct inode *ino = lseg->pls_layout->plh_inode; + + BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + list_del(&lseg->pls_list); + if (list_empty(&lseg->pls_layout->plh_segs)) { + struct nfs_client *clp; + + clp = NFS_SERVER(ino)->nfs_client; + spin_lock(&clp->cl_lock); + /* List does not take a reference, so no need for put here */ + list_del_init(&lseg->pls_layout->plh_layouts); + spin_unlock(&clp->cl_lock); + clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); + } + rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); + list_add(&lseg->pls_list, tmp_list); + return 1; + } + return 0; +} + +static bool +should_free_lseg(u32 lseg_iomode, u32 recall_iomode) +{ + return (recall_iomode == IOMODE_ANY || + lseg_iomode == recall_iomode); +} + +/* Returns 1 if lseg is removed from list, 0 otherwise */ +static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list) +{ + int rv = 0; + + if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { + /* Remove the reference keeping the lseg in the + * list. It will now be removed when all + * outstanding io is finished. + */ + rv = put_lseg_locked(lseg, tmp_list); + } + return rv; +} + +/* Returns count of number of matching invalid lsegs remaining in list + * after call. + */ +int +mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, + struct list_head *tmp_list, + u32 iomode) +{ + struct pnfs_layout_segment *lseg, *next; + int invalid = 0, removed = 0; + + dprintk("%s:Begin lo %p\n", __func__, lo); + + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + if (should_free_lseg(lseg->pls_range.iomode, iomode)) { + dprintk("%s: freeing lseg %p iomode %d " + "offset %llu length %llu\n", __func__, + lseg, lseg->pls_range.iomode, lseg->pls_range.offset, + lseg->pls_range.length); + invalid++; + removed += mark_lseg_invalid(lseg, tmp_list); + } + dprintk("%s:Return %i\n", __func__, invalid - removed); + return invalid - removed; +} + +void +pnfs_free_lseg_list(struct list_head *free_me) +{ + struct pnfs_layout_segment *lseg, *tmp; + + list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { + list_del(&lseg->pls_list); + free_lseg(lseg); + } +} + +void +pnfs_destroy_layout(struct nfs_inode *nfsi) +{ + struct pnfs_layout_hdr *lo; + LIST_HEAD(tmp_list); + + spin_lock(&nfsi->vfs_inode.i_lock); + lo = nfsi->layout; + if (lo) { + set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); + mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); + /* Matched by refcount set to 1 in alloc_init_layout_hdr */ + put_layout_hdr_locked(lo); + } + spin_unlock(&nfsi->vfs_inode.i_lock); + pnfs_free_lseg_list(&tmp_list); +} + +/* + * Called by the state manger to remove all layouts established under an + * expired lease. + */ +void +pnfs_destroy_all_layouts(struct nfs_client *clp) +{ + struct pnfs_layout_hdr *lo; + LIST_HEAD(tmp_list); + + spin_lock(&clp->cl_lock); + list_splice_init(&clp->cl_layouts, &tmp_list); + spin_unlock(&clp->cl_lock); + + while (!list_empty(&tmp_list)) { + lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, + plh_layouts); + dprintk("%s freeing layout for inode %lu\n", __func__, + lo->plh_inode->i_ino); + pnfs_destroy_layout(NFS_I(lo->plh_inode)); + } +} + +/* update lo->plh_stateid with new if is more recent */ +void +pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, + bool update_barrier) +{ + u32 oldseq, newseq; + + oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); + newseq = be32_to_cpu(new->stateid.seqid); + if ((int)(newseq - oldseq) > 0) { + memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); + if (update_barrier) { + u32 new_barrier = be32_to_cpu(new->stateid.seqid); + + if ((int)(new_barrier - lo->plh_barrier)) + lo->plh_barrier = new_barrier; + } else { + /* Because of wraparound, we want to keep the barrier + * "close" to the current seqids. It needs to be + * within 2**31 to count as "behind", so if it + * gets too near that limit, give us a litle leeway + * and bring it to within 2**30. + * NOTE - and yes, this is all unsigned arithmetic. + */ + if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) + lo->plh_barrier = newseq - (1 << 30); + } + } +} + +/* lget is set to 1 if called from inside send_layoutget call chain */ +static bool +pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, + int lget) +{ + if ((stateid) && + (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) + return true; + return lo->plh_block_lgets || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || + (list_empty(&lo->plh_segs) && + (atomic_read(&lo->plh_outstanding) > lget)); +} + +int +pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, + struct nfs4_state *open_state) +{ + int status = 0; + + dprintk("--> %s\n", __func__); + spin_lock(&lo->plh_inode->i_lock); + if (pnfs_layoutgets_blocked(lo, NULL, 1)) { + status = -EAGAIN; + } else if (list_empty(&lo->plh_segs)) { + int seq; + + do { + seq = read_seqbegin(&open_state->seqlock); + memcpy(dst->data, open_state->stateid.data, + sizeof(open_state->stateid.data)); + } while (read_seqretry(&open_state->seqlock, seq)); + } else + memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); + spin_unlock(&lo->plh_inode->i_lock); + dprintk("<-- %s\n", __func__); + return status; +} + +/* +* Get layout from server. +* for now, assume that whole file layouts are requested. +* arg->offset: 0 +* arg->length: all ones +*/ +static struct pnfs_layout_segment * +send_layoutget(struct pnfs_layout_hdr *lo, + struct nfs_open_context *ctx, + u32 iomode) +{ + struct inode *ino = lo->plh_inode; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs4_layoutget *lgp; + struct pnfs_layout_segment *lseg = NULL; + + dprintk("--> %s\n", __func__); + + BUG_ON(ctx == NULL); + lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); + if (lgp == NULL) + return NULL; + lgp->args.minlength = NFS4_MAX_UINT64; + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; + lgp->args.range.iomode = iomode; + lgp->args.range.offset = 0; + lgp->args.range.length = NFS4_MAX_UINT64; + lgp->args.type = server->pnfs_curr_ld->id; + lgp->args.inode = ino; + lgp->args.ctx = get_nfs_open_context(ctx); + lgp->lsegpp = &lseg; + + /* Synchronously retrieve layout information from server and + * store in lseg. + */ + nfs4_proc_layoutget(lgp); + if (!lseg) { + /* remember that LAYOUTGET failed and suspend trying */ + set_bit(lo_fail_bit(iomode), &lo->plh_flags); + } + return lseg; +} + +bool pnfs_roc(struct inode *ino) +{ + struct pnfs_layout_hdr *lo; + struct pnfs_layout_segment *lseg, *tmp; + LIST_HEAD(tmp_list); + bool found = false; + + spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) + goto out_nolayout; + list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) + if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + mark_lseg_invalid(lseg, &tmp_list); + found = true; + } + if (!found) + goto out_nolayout; + lo->plh_block_lgets++; + get_layout_hdr(lo); /* matched in pnfs_roc_release */ + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&tmp_list); + return true; + +out_nolayout: + spin_unlock(&ino->i_lock); + return false; +} + +void pnfs_roc_release(struct inode *ino) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + lo->plh_block_lgets--; + put_layout_hdr_locked(lo); + spin_unlock(&ino->i_lock); +} + +void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + if ((int)(barrier - lo->plh_barrier) > 0) + lo->plh_barrier = barrier; + spin_unlock(&ino->i_lock); +} + +bool pnfs_roc_drain(struct inode *ino, u32 *barrier) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_segment *lseg; + bool found = false; + + spin_lock(&ino->i_lock); + list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) + if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + found = true; + break; + } + if (!found) { + struct pnfs_layout_hdr *lo = nfsi->layout; + u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); + + /* Since close does not return a layout stateid for use as + * a barrier, we choose the worst-case barrier. + */ + *barrier = current_seqid + atomic_read(&lo->plh_outstanding); + } + spin_unlock(&ino->i_lock); + return found; +} + +/* + * Compare two layout segments for sorting into layout cache. + * We want to preferentially return RW over RO layouts, so ensure those + * are seen first. + */ +static s64 +cmp_layout(u32 iomode1, u32 iomode2) +{ + /* read > read/write */ + return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); +} + +static void +pnfs_insert_layout(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_segment *lp; + int found = 0; + + dprintk("%s:Begin\n", __func__); + + assert_spin_locked(&lo->plh_inode->i_lock); + list_for_each_entry(lp, &lo->plh_segs, pls_list) { + if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) + continue; + list_add_tail(&lseg->pls_list, &lp->pls_list); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu before " + "lp %p iomode %d offset %llu length %llu\n", + __func__, lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, lseg->pls_range.length, + lp, lp->pls_range.iomode, lp->pls_range.offset, + lp->pls_range.length); + found = 1; + break; + } + if (!found) { + list_add_tail(&lseg->pls_list, &lo->plh_segs); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu at tail\n", + __func__, lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, lseg->pls_range.length); + } + get_layout_hdr(lo); + + dprintk("%s:Return\n", __func__); +} + +static struct pnfs_layout_hdr * +alloc_init_layout_hdr(struct inode *ino) +{ + struct pnfs_layout_hdr *lo; + + lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); + if (!lo) + return NULL; + atomic_set(&lo->plh_refcount, 1); + INIT_LIST_HEAD(&lo->plh_layouts); + INIT_LIST_HEAD(&lo->plh_segs); + INIT_LIST_HEAD(&lo->plh_bulk_recall); + lo->plh_inode = ino; + return lo; +} + +static struct pnfs_layout_hdr * +pnfs_find_alloc_layout(struct inode *ino) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *new = NULL; + + dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); + + assert_spin_locked(&ino->i_lock); + if (nfsi->layout) { + if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) + return NULL; + else + return nfsi->layout; + } + spin_unlock(&ino->i_lock); + new = alloc_init_layout_hdr(ino); + spin_lock(&ino->i_lock); + + if (likely(nfsi->layout == NULL)) /* Won the race? */ + nfsi->layout = new; + else + kfree(new); + return nfsi->layout; +} + +/* + * iomode matching rules: + * iomode lseg match + * ----- ----- ----- + * ANY READ true + * ANY RW true + * RW READ false + * RW RW true + * READ READ true + * READ RW true + */ +static int +is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) +{ + return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); +} + +/* + * lookup range in layout + */ +static struct pnfs_layout_segment * +pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) +{ + struct pnfs_layout_segment *lseg, *ret = NULL; + + dprintk("%s:Begin\n", __func__); + + assert_spin_locked(&lo->plh_inode->i_lock); + list_for_each_entry(lseg, &lo->plh_segs, pls_list) { + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && + is_matching_lseg(lseg, iomode)) { + ret = lseg; + break; + } + if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) + break; + } + + dprintk("%s:Return lseg %p ref %d\n", + __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); + return ret; +} + +/* + * Layout segment is retreived from the server if not cached. + * The appropriate layout segment is referenced and returned to the caller. + */ +struct pnfs_layout_segment * +pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + enum pnfs_iomode iomode) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; + struct pnfs_layout_hdr *lo; + struct pnfs_layout_segment *lseg = NULL; + + if (!pnfs_enabled_sb(NFS_SERVER(ino))) + return NULL; + spin_lock(&ino->i_lock); + lo = pnfs_find_alloc_layout(ino); + if (lo == NULL) { + dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); + goto out_unlock; + } + + /* Do we even need to bother with this? */ + if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + dprintk("%s matches recall, use MDS\n", __func__); + goto out_unlock; + } + /* Check to see if the layout for the given range already exists */ + lseg = pnfs_find_lseg(lo, iomode); + if (lseg) + goto out_unlock; + + /* if LAYOUTGET already failed once we don't try again */ + if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) + goto out_unlock; + + if (pnfs_layoutgets_blocked(lo, NULL, 0)) + goto out_unlock; + atomic_inc(&lo->plh_outstanding); + + get_layout_hdr(lo); + if (list_empty(&lo->plh_segs)) { + /* The lo must be on the clp list if there is any + * chance of a CB_LAYOUTRECALL(FILE) coming in. + */ + spin_lock(&clp->cl_lock); + BUG_ON(!list_empty(&lo->plh_layouts)); + list_add_tail(&lo->plh_layouts, &clp->cl_layouts); + spin_unlock(&clp->cl_lock); + } + spin_unlock(&ino->i_lock); + + lseg = send_layoutget(lo, ctx, iomode); + if (!lseg) { + spin_lock(&ino->i_lock); + if (list_empty(&lo->plh_segs)) { + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); + clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); + } + spin_unlock(&ino->i_lock); + } + atomic_dec(&lo->plh_outstanding); + put_layout_hdr(lo); +out: + dprintk("%s end, state 0x%lx lseg %p\n", __func__, + nfsi->layout->plh_flags, lseg); + return lseg; +out_unlock: + spin_unlock(&ino->i_lock); + goto out; +} + +int +pnfs_layout_process(struct nfs4_layoutget *lgp) +{ + struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; + struct nfs4_layoutget_res *res = &lgp->res; + struct pnfs_layout_segment *lseg; + struct inode *ino = lo->plh_inode; + struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; + int status = 0; + + /* Verify we got what we asked for. + * Note that because the xdr parsing only accepts a single + * element array, this can fail even if the server is behaving + * correctly. + */ + if (lgp->args.range.iomode > res->range.iomode || + res->range.offset != 0 || + res->range.length != NFS4_MAX_UINT64) { + status = -EINVAL; + goto out; + } + /* Inject layout blob into I/O device driver */ + lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); + if (!lseg || IS_ERR(lseg)) { + if (!lseg) + status = -ENOMEM; + else + status = PTR_ERR(lseg); + dprintk("%s: Could not allocate layout: error %d\n", + __func__, status); + goto out; + } + + spin_lock(&ino->i_lock); + if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + dprintk("%s forget reply due to recall\n", __func__); + goto out_forget_reply; + } + + if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { + dprintk("%s forget reply due to state\n", __func__); + goto out_forget_reply; + } + init_lseg(lo, lseg); + lseg->pls_range = res->range; + *lgp->lsegpp = lseg; + pnfs_insert_layout(lo, lseg); + + if (res->return_on_close) { + set_bit(NFS_LSEG_ROC, &lseg->pls_flags); + set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); + } + + /* Done processing layoutget. Set the layout stateid */ + pnfs_set_layout_stateid(lo, &res->stateid, false); + spin_unlock(&ino->i_lock); +out: + return status; + +out_forget_reply: + spin_unlock(&ino->i_lock); + lseg->pls_layout = lo; + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + goto out; +} + +/* + * Device ID cache. Currently supports one layout type per struct nfs_client. + * Add layout type to the lookup key to expand to support multiple types. + */ +int +pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, + void (*free_callback)(struct pnfs_deviceid_node *)) +{ + struct pnfs_deviceid_cache *c; + + c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); + if (!c) + return -ENOMEM; + spin_lock(&clp->cl_lock); + if (clp->cl_devid_cache != NULL) { + atomic_inc(&clp->cl_devid_cache->dc_ref); + dprintk("%s [kref [%d]]\n", __func__, + atomic_read(&clp->cl_devid_cache->dc_ref)); + kfree(c); + } else { + /* kzalloc initializes hlists */ + spin_lock_init(&c->dc_lock); + atomic_set(&c->dc_ref, 1); + c->dc_free_callback = free_callback; + clp->cl_devid_cache = c; + dprintk("%s [new]\n", __func__); + } + spin_unlock(&clp->cl_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); + +/* + * Called from pnfs_layoutdriver_type->free_lseg + * last layout segment reference frees deviceid + */ +void +pnfs_put_deviceid(struct pnfs_deviceid_cache *c, + struct pnfs_deviceid_node *devid) +{ + struct nfs4_deviceid *id = &devid->de_id; + struct pnfs_deviceid_node *d; + struct hlist_node *n; + long h = nfs4_deviceid_hash(id); + + dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); + if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) + return; + + hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) + if (!memcmp(&d->de_id, id, sizeof(*id))) { + hlist_del_rcu(&d->de_node); + spin_unlock(&c->dc_lock); + synchronize_rcu(); + c->dc_free_callback(devid); + return; + } + spin_unlock(&c->dc_lock); + /* Why wasn't it found in the list? */ + BUG(); +} +EXPORT_SYMBOL_GPL(pnfs_put_deviceid); + +/* Find and reference a deviceid */ +struct pnfs_deviceid_node * +pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) +{ + struct pnfs_deviceid_node *d; + struct hlist_node *n; + long hash = nfs4_deviceid_hash(id); + + dprintk("--> %s hash %ld\n", __func__, hash); + rcu_read_lock(); + hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { + if (!memcmp(&d->de_id, id, sizeof(*id))) { + if (!atomic_inc_not_zero(&d->de_ref)) { + goto fail; + } else { + rcu_read_unlock(); + return d; + } + } + } +fail: + rcu_read_unlock(); + return NULL; +} +EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); + +/* + * Add a deviceid to the cache. + * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new + */ +struct pnfs_deviceid_node * +pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) +{ + struct pnfs_deviceid_node *d; + long hash = nfs4_deviceid_hash(&new->de_id); + + dprintk("--> %s hash %ld\n", __func__, hash); + spin_lock(&c->dc_lock); + d = pnfs_find_get_deviceid(c, &new->de_id); + if (d) { + spin_unlock(&c->dc_lock); + dprintk("%s [discard]\n", __func__); + c->dc_free_callback(new); + return d; + } + INIT_HLIST_NODE(&new->de_node); + atomic_set(&new->de_ref, 1); + hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); + spin_unlock(&c->dc_lock); + dprintk("%s [new]\n", __func__); + return new; +} +EXPORT_SYMBOL_GPL(pnfs_add_deviceid); + +void +pnfs_put_deviceid_cache(struct nfs_client *clp) +{ + struct pnfs_deviceid_cache *local = clp->cl_devid_cache; + + dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); + if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { + int i; + /* Verify cache is empty */ + for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) + BUG_ON(!hlist_empty(&local->dc_deviceids[i])); + clp->cl_devid_cache = NULL; + spin_unlock(&clp->cl_lock); + kfree(local); + } +} +EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h new file mode 100644 index 00000000000..e2612ea0cbe --- /dev/null +++ b/fs/nfs/pnfs.h @@ -0,0 +1,235 @@ +/* + * pNFS client data structures. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#ifndef FS_NFS_PNFS_H +#define FS_NFS_PNFS_H + +enum { + NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ + NFS_LSEG_ROC, /* roc bit received from server */ +}; + +struct pnfs_layout_segment { + struct list_head pls_list; + struct pnfs_layout_range pls_range; + atomic_t pls_refcount; + unsigned long pls_flags; + struct pnfs_layout_hdr *pls_layout; +}; + +#ifdef CONFIG_NFS_V4_1 + +#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" + +enum { + NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ + NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ + NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ + NFS_LAYOUT_ROC, /* some lseg had roc bit set */ + NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ +}; + +/* Per-layout driver specific registration structure */ +struct pnfs_layoutdriver_type { + struct list_head pnfs_tblid; + const u32 id; + const char *name; + struct module *owner; + int (*set_layoutdriver) (struct nfs_server *); + int (*clear_layoutdriver) (struct nfs_server *); + struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); + void (*free_lseg) (struct pnfs_layout_segment *lseg); +}; + +struct pnfs_layout_hdr { + atomic_t plh_refcount; + struct list_head plh_layouts; /* other client layouts */ + struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ + struct list_head plh_segs; /* layout segments list */ + nfs4_stateid plh_stateid; + atomic_t plh_outstanding; /* number of RPCs out */ + unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ + u32 plh_barrier; /* ignore lower seqids */ + unsigned long plh_flags; + struct inode *plh_inode; +}; + +struct pnfs_device { + struct nfs4_deviceid dev_id; + unsigned int layout_type; + unsigned int mincount; + struct page **pages; + void *area; + unsigned int pgbase; + unsigned int pglen; +}; + +/* + * Device ID RCU cache. A device ID is unique per client ID and layout type. + */ +#define NFS4_DEVICE_ID_HASH_BITS 5 +#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) +#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) + +static inline u32 +nfs4_deviceid_hash(struct nfs4_deviceid *id) +{ + unsigned char *cptr = (unsigned char *)id->data; + unsigned int nbytes = NFS4_DEVICEID4_SIZE; + u32 x = 0; + + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x & NFS4_DEVICE_ID_HASH_MASK; +} + +struct pnfs_deviceid_node { + struct hlist_node de_node; + struct nfs4_deviceid de_id; + atomic_t de_ref; +}; + +struct pnfs_deviceid_cache { + spinlock_t dc_lock; + atomic_t dc_ref; + void (*dc_free_callback)(struct pnfs_deviceid_node *); + struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE]; +}; + +extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *, + void (*free_callback)(struct pnfs_deviceid_node *)); +extern void pnfs_put_deviceid_cache(struct nfs_client *); +extern struct pnfs_deviceid_node *pnfs_find_get_deviceid( + struct pnfs_deviceid_cache *, + struct nfs4_deviceid *); +extern struct pnfs_deviceid_node *pnfs_add_deviceid( + struct pnfs_deviceid_cache *, + struct pnfs_deviceid_node *); +extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, + struct pnfs_deviceid_node *devid); + +extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); +extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); + +/* nfs4proc.c */ +extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, + struct pnfs_device *dev); +extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); + +/* pnfs.c */ +void get_layout_hdr(struct pnfs_layout_hdr *lo); +struct pnfs_layout_segment * +pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + enum pnfs_iomode access_type); +void set_pnfs_layoutdriver(struct nfs_server *, u32 id); +void unset_pnfs_layoutdriver(struct nfs_server *); +int pnfs_layout_process(struct nfs4_layoutget *lgp); +void pnfs_free_lseg_list(struct list_head *tmp_list); +void pnfs_destroy_layout(struct nfs_inode *); +void pnfs_destroy_all_layouts(struct nfs_client *); +void put_layout_hdr(struct pnfs_layout_hdr *lo); +void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, + const nfs4_stateid *new, + bool update_barrier); +int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, + struct pnfs_layout_hdr *lo, + struct nfs4_state *open_state); +int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, + struct list_head *tmp_list, + u32 iomode); +bool pnfs_roc(struct inode *ino); +void pnfs_roc_release(struct inode *ino); +void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); +bool pnfs_roc_drain(struct inode *ino, u32 *barrier); + + +static inline int lo_fail_bit(u32 iomode) +{ + return iomode == IOMODE_RW ? + NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; +} + +/* Return true if a layout driver is being used for this mountpoint */ +static inline int pnfs_enabled_sb(struct nfs_server *nfss) +{ + return nfss->pnfs_curr_ld != NULL; +} + +#else /* CONFIG_NFS_V4_1 */ + +static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) +{ +} + +static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) +{ +} + +static inline struct pnfs_layout_segment * +pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + enum pnfs_iomode access_type) +{ + return NULL; +} + +static inline bool +pnfs_roc(struct inode *ino) +{ + return false; +} + +static inline void +pnfs_roc_release(struct inode *ino) +{ +} + +static inline void +pnfs_roc_set_barrier(struct inode *ino, u32 barrier) +{ +} + +static inline bool +pnfs_roc_drain(struct inode *ino, u32 *barrier) +{ + return false; +} + +static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) +{ +} + +static inline void unset_pnfs_layoutdriver(struct nfs_server *s) +{ +} + +#endif /* CONFIG_NFS_V4_1 */ + +#endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 611bec22f55..77d5e21c4ad 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nameidata *nd) + int flags, struct nfs_open_context *ctx) { struct nfs_createdata *data; struct rpc_message msg = { @@ -365,17 +365,32 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } +static void +nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +{ + msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; +} + +static int +nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, + struct inode *new_dir) +{ + if (nfs_async_handle_expired_key(task)) + return 0; + nfs_mark_for_revalidate(old_dir); + nfs_mark_for_revalidate(new_dir); + return 1; +} + static int nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { struct nfs_renameargs arg = { - .fromfh = NFS_FH(old_dir), - .fromname = old_name->name, - .fromlen = old_name->len, - .tofh = NFS_FH(new_dir), - .toname = new_name->name, - .tolen = new_name->len + .old_dir = NFS_FH(old_dir), + .old_name = old_name, + .new_dir = NFS_FH(new_dir), + .new_name = new_name, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_RENAME], @@ -443,7 +458,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, fattr = nfs_alloc_fattr(); status = -ENOMEM; if (fh == NULL || fattr == NULL) - goto out; + goto out_free; status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); @@ -456,6 +471,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, if (status == 0) status = nfs_instantiate(dentry, fh, fattr); +out_free: nfs_free_fattr(fattr); nfs_free_fhandle(fh); out: @@ -519,14 +535,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name) */ static int nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page *page, unsigned int count, int plus) + u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; struct nfs_readdirargs arg = { .fh = NFS_FH(dir), .cookie = cookie, .count = count, - .pages = &page, + .pages = pages, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READDIR], @@ -705,6 +721,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .unlink_setup = nfs_proc_unlink_setup, .unlink_done = nfs_proc_unlink_done, .rename = nfs_proc_rename, + .rename_setup = nfs_proc_rename_setup, + .rename_done = nfs_proc_rename_done, .link = nfs_proc_link, .symlink = nfs_proc_symlink, .mkdir = nfs_proc_mkdir, @@ -714,7 +732,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .statfs = nfs_proc_statfs, .fsinfo = nfs_proc_fsinfo, .pathconf = nfs_proc_pathconf, - .decode_dirent = nfs_decode_dirent, + .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 87adc274424..aedcaa7f291 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -25,6 +25,7 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -46,7 +47,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -121,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); + pnfs_update_layout(inode, ctx, IOMODE_READ); new = nfs_create_request(ctx, inode, page, 0, len); if (IS_ERR(new)) { unlock_page(page); @@ -151,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req) (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); - nfs_clear_request(req); nfs_release_request(req); } @@ -625,6 +625,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ + pnfs_update_layout(inode, desc.ctx, IOMODE_READ); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f4cbf0c306c..b68c8607770 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -39,7 +39,6 @@ #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> -#include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/mnt_namespace.h> @@ -67,6 +66,12 @@ #define NFSDBG_FACILITY NFSDBG_VFS +#ifdef CONFIG_NFS_V3 +#define NFS_DEFAULT_VERSION 3 +#else +#define NFS_DEFAULT_VERSION 2 +#endif + enum { /* Mount options that take no arguments */ Opt_soft, Opt_hard, @@ -100,6 +105,7 @@ enum { Opt_addr, Opt_mountaddr, Opt_clientaddr, Opt_lookupcache, Opt_fscache_uniq, + Opt_local_lock, /* Special mount options */ Opt_userspace, Opt_deprecated, Opt_sloppy, @@ -171,6 +177,7 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_lookupcache, "lookupcache=%s" }, { Opt_fscache_uniq, "fsc=%s" }, + { Opt_local_lock, "local_lock=%s" }, { Opt_err, NULL } }; @@ -236,14 +243,30 @@ static match_table_t nfs_lookupcache_tokens = { { Opt_lookupcache_err, NULL } }; +enum { + Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, + Opt_local_lock_none, + + Opt_local_lock_err +}; + +static match_table_t nfs_local_lock_tokens = { + { Opt_local_lock_all, "all" }, + { Opt_local_lock_flock, "flock" }, + { Opt_local_lock_posix, "posix" }, + { Opt_local_lock_none, "none" }, + + { Opt_local_lock_err, NULL } +}; + static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); -static int nfs_xdev_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); static void nfs_put_super(struct super_block *); static void nfs_kill_super(struct super_block *); static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); @@ -259,7 +282,7 @@ static struct file_system_type nfs_fs_type = { struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs", - .get_sb = nfs_xdev_get_sb, + .mount = nfs_xdev_mount, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -284,14 +307,14 @@ static int nfs4_try_mount(int flags, const char *dev_name, struct nfs_parsed_mount_data *data, struct vfsmount *mnt); static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs4_remote_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs4_xdev_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { @@ -305,7 +328,7 @@ static struct file_system_type nfs4_fs_type = { static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs4_remote_get_sb, + .mount = nfs4_remote_mount, .kill_sb = nfs4_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -313,7 +336,7 @@ static struct file_system_type nfs4_remote_fs_type = { struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs4_xdev_get_sb, + .mount = nfs4_xdev_mount, .kill_sb = nfs4_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -321,7 +344,7 @@ struct file_system_type nfs4_xdev_fs_type = { static struct file_system_type nfs4_remote_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs4_remote_referral_get_sb, + .mount = nfs4_remote_referral_mount, .kill_sb = nfs4_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -575,7 +598,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->mountd_version || showdefaults) seq_printf(m, ",mountvers=%u", nfss->mountd_version); - if (nfss->mountd_port || showdefaults) + if ((nfss->mountd_port && + nfss->mountd_port != (unsigned short)NFS_UNSPEC_PORT) || + showdefaults) seq_printf(m, ",mountport=%u", nfss->mountd_port); nfs_show_mountd_netid(m, nfss, showdefaults); @@ -622,6 +647,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const struct proc_nfs_info *nfs_infop; struct nfs_client *clp = nfss->nfs_client; u32 version = clp->rpc_ops->version; + int local_flock, local_fcntl; seq_printf(m, ",vers=%u", version); seq_printf(m, ",rsize=%u", nfss->rsize); @@ -670,6 +696,18 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_printf(m, ",lookupcache=pos"); } + + local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK; + local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL; + + if (!local_flock && !local_fcntl) + seq_printf(m, ",local_lock=none"); + else if (local_flock && local_fcntl) + seq_printf(m, ",local_lock=all"); + else if (local_flock) + seq_printf(m, ",local_lock=flock"); + else + seq_printf(m, ",local_lock=posix"); } /* @@ -1017,9 +1055,13 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_lock: mnt->flags &= ~NFS_MOUNT_NONLM; + mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); break; case Opt_nolock: mnt->flags |= NFS_MOUNT_NONLM; + mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); break; case Opt_v2: mnt->flags &= ~NFS_MOUNT_VER3; @@ -1029,12 +1071,10 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_VER3; mnt->version = 3; break; -#ifdef CONFIG_NFS_V4 case Opt_v4: mnt->flags &= ~NFS_MOUNT_VER3; mnt->version = 4; break; -#endif case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; @@ -1246,12 +1286,10 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_VER3; mnt->version = 3; break; -#ifdef CONFIG_NFS_V4 case NFS4_VERSION: mnt->flags &= ~NFS_MOUNT_VER3; mnt->version = 4; break; -#endif default: goto out_invalid_value; } @@ -1420,6 +1458,34 @@ static int nfs_parse_mount_options(char *raw, mnt->fscache_uniq = string; mnt->options |= NFS_OPTION_FSCACHE; break; + case Opt_local_lock: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, nfs_local_lock_tokens, + args); + kfree(string); + switch (token) { + case Opt_local_lock_all: + mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_local_lock_flock: + mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; + break; + case Opt_local_lock_posix: + mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; + break; + case Opt_local_lock_none: + mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + default: + dfprintk(MOUNT, "NFS: invalid " + "local_lock argument\n"); + return 0; + }; + break; /* * Special options @@ -1825,6 +1891,12 @@ static int nfs_validate_mount_data(void *options, if (!args->nfs_server.hostname) goto out_nomem; + if (!(data->flags & NFS_MOUNT_NONLM)) + args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + else + args->flags |= (NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the @@ -2130,6 +2202,7 @@ static int nfs_set_super(struct super_block *s, void *data) s->s_flags = sb_mntdata->mntflags; s->s_fs_info = server; + s->s_d_op = server->nfs_client->rpc_ops->dentry_ops; ret = set_anon_super(s, server); if (ret == 0) server->s_dev = s->s_dev; @@ -2208,7 +2281,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, }; int error = -ENOMEM; - data = nfs_alloc_parsed_mount_data(3); + data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); mntfh = nfs_alloc_fhandle(); if (data == NULL || mntfh == NULL) goto out_free_fh; @@ -2328,9 +2401,9 @@ static void nfs_kill_super(struct super_block *s) /* * Clone an NFS2/3 server record on xdev traversal (FSID-change) */ -static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data, - struct vfsmount *mnt) +static struct dentry * +nfs_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { struct nfs_clone_mount *data = raw_data; struct super_block *s; @@ -2342,7 +2415,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, }; int error; - dprintk("--> nfs_xdev_get_sb()\n"); + dprintk("--> nfs_xdev_mount()\n"); /* create a new volume representation */ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); @@ -2389,28 +2462,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, } s->s_flags |= MS_ACTIVE; - mnt->mnt_sb = s; - mnt->mnt_root = mntroot; /* clone any lsm security options from the parent to the new sb */ security_sb_clone_mnt_opts(data->sb, s); - dprintk("<-- nfs_xdev_get_sb() = 0\n"); - return 0; + dprintk("<-- nfs_xdev_mount() = 0\n"); + return mntroot; out_err_nosb: nfs_free_server(server); out_err_noserver: - dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); - return error; + dprintk("<-- nfs_xdev_mount() = %d [error]\n", error); + return ERR_PTR(error); error_splat_super: if (server && !s->s_root) bdi_unregister(&server->backing_dev_info); error_splat_bdi: deactivate_locked_super(s); - dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); - return error; + dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error); + return ERR_PTR(error); } #ifdef CONFIG_NFS_V4 @@ -2426,7 +2497,13 @@ static void nfs4_clone_super(struct super_block *sb, sb->s_maxbytes = old_sb->s_maxbytes; sb->s_time_gran = 1; sb->s_op = old_sb->s_op; - nfs_initialise_sb(sb); + /* + * The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_xattr = old_sb->s_xattr; + nfs_initialise_sb(sb); } /* @@ -2436,12 +2513,19 @@ static void nfs4_fill_super(struct super_block *sb) { sb->s_time_gran = 1; sb->s_op = &nfs4_sops; + /* + * The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_xattr = nfs4_xattr_handlers; nfs_initialise_sb(sb); } static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { - args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); + args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| + NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); } static int nfs4_validate_text_mount_data(void *options, @@ -2579,8 +2663,9 @@ out_no_address: /* * Get the superblock for the NFS4 root partition */ -static int nfs4_remote_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +static struct dentry * +nfs4_remote_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { struct nfs_parsed_mount_data *data = raw_data; struct super_block *s; @@ -2644,15 +2729,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type, goto error_splat_root; s->s_flags |= MS_ACTIVE; - mnt->mnt_sb = s; - mnt->mnt_root = mntroot; - error = 0; + + security_free_mnt_opts(&data->lsm_opts); + nfs_free_fhandle(mntfh); + return mntroot; out: security_free_mnt_opts(&data->lsm_opts); out_free_fh: nfs_free_fhandle(mntfh); - return error; + return ERR_PTR(error); out_free: nfs_free_server(server); @@ -2898,9 +2984,9 @@ static void nfs4_kill_super(struct super_block *sb) /* * Clone an NFS4 server record on xdev traversal (FSID-change) */ -static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data, - struct vfsmount *mnt) +static struct dentry * +nfs4_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { struct nfs_clone_mount *data = raw_data; struct super_block *s; @@ -2912,7 +2998,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, }; int error; - dprintk("--> nfs4_xdev_get_sb()\n"); + dprintk("--> nfs4_xdev_mount()\n"); /* create a new volume representation */ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); @@ -2959,32 +3045,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, } s->s_flags |= MS_ACTIVE; - mnt->mnt_sb = s; - mnt->mnt_root = mntroot; security_sb_clone_mnt_opts(data->sb, s); - dprintk("<-- nfs4_xdev_get_sb() = 0\n"); - return 0; + dprintk("<-- nfs4_xdev_mount() = 0\n"); + return mntroot; out_err_nosb: nfs_free_server(server); out_err_noserver: - dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); - return error; + dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error); + return ERR_PTR(error); error_splat_super: if (server && !s->s_root) bdi_unregister(&server->backing_dev_info); error_splat_bdi: deactivate_locked_super(s); - dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); - return error; + dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error); + return ERR_PTR(error); } -static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, - struct vfsmount *mnt) +static struct dentry * +nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { struct nfs_clone_mount *data = raw_data; struct super_block *s; @@ -3048,14 +3132,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, } s->s_flags |= MS_ACTIVE; - mnt->mnt_sb = s; - mnt->mnt_root = mntroot; security_sb_clone_mnt_opts(data->sb, s); nfs_free_fhandle(mntfh); dprintk("<-- nfs4_referral_get_sb() = 0\n"); - return 0; + return mntroot; out_err_nosb: nfs_free_server(server); @@ -3063,7 +3145,7 @@ out_err_noserver: nfs_free_fhandle(mntfh); out_err_nofh: dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); - return error; + return ERR_PTR(error); error_splat_super: if (server && !s->s_root) @@ -3072,7 +3154,7 @@ error_splat_bdi: deactivate_locked_super(s); nfs_free_fhandle(mntfh); dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); - return error; + return ERR_PTR(error); } /* diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index ad4d2e787b2..978aaeb8a09 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = { .extra1 = (int *)&nfs_set_port_min, .extra2 = (int *)&nfs_set_port_max, }, +#ifndef CONFIG_NFS_USE_NEW_IDMAPPER { .procname = "idmap_cache_timeout", .data = &nfs_idmap_cache_timeout, @@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, +#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ #endif { .procname = "nfs_mountpoint_timeout", diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 2f84adaad42..e313a51acdd 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -13,9 +13,12 @@ #include <linux/nfs_fs.h> #include <linux/sched.h> #include <linux/wait.h> +#include <linux/namei.h> #include "internal.h" #include "nfs4_fs.h" +#include "iostat.h" +#include "delegation.h" struct nfs_unlinkdata { struct hlist_node list; @@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry) * @dir: parent directory of dentry * @dentry: dentry to unlink */ -int +static int nfs_async_unlink(struct inode *dir, struct dentry *dentry) { struct nfs_unlinkdata *data; @@ -259,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) status = PTR_ERR(data->cred); goto out_free; } - data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; data->res.dir_attr = &data->dir_attr; status = -EBUSY; @@ -303,3 +305,256 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) nfs_free_unlinkdata(data); } + +/* Cancel a queued async unlink. Called when a sillyrename run fails. */ +static void +nfs_cancel_async_unlink(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + struct nfs_unlinkdata *data = dentry->d_fsdata; + + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + spin_unlock(&dentry->d_lock); + nfs_free_unlinkdata(data); + return; + } + spin_unlock(&dentry->d_lock); +} + +struct nfs_renamedata { + struct nfs_renameargs args; + struct nfs_renameres res; + struct rpc_cred *cred; + struct inode *old_dir; + struct dentry *old_dentry; + struct nfs_fattr old_fattr; + struct inode *new_dir; + struct dentry *new_dentry; + struct nfs_fattr new_fattr; +}; + +/** + * nfs_async_rename_done - Sillyrename post-processing + * @task: rpc_task of the sillyrename + * @calldata: nfs_renamedata for the sillyrename + * + * Do the directory attribute updates and the d_move + */ +static void nfs_async_rename_done(struct rpc_task *task, void *calldata) +{ + struct nfs_renamedata *data = calldata; + struct inode *old_dir = data->old_dir; + struct inode *new_dir = data->new_dir; + + if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { + nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); + return; + } + + if (task->tk_status != 0) { + nfs_cancel_async_unlink(data->old_dentry); + return; + } + + nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); + d_move(data->old_dentry, data->new_dentry); +} + +/** + * nfs_async_rename_release - Release the sillyrename data. + * @calldata: the struct nfs_renamedata to be released + */ +static void nfs_async_rename_release(void *calldata) +{ + struct nfs_renamedata *data = calldata; + struct super_block *sb = data->old_dir->i_sb; + + if (data->old_dentry->d_inode) + nfs_mark_for_revalidate(data->old_dentry->d_inode); + + dput(data->old_dentry); + dput(data->new_dentry); + iput(data->old_dir); + iput(data->new_dir); + nfs_sb_deactive(sb); + put_rpccred(data->cred); + kfree(data); +} + +#if defined(CONFIG_NFS_V4_1) +static void nfs_rename_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_renamedata *data = calldata; + struct nfs_server *server = NFS_SERVER(data->old_dir); + + if (nfs4_setup_sequence(server, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + +static const struct rpc_call_ops nfs_rename_ops = { + .rpc_call_done = nfs_async_rename_done, + .rpc_release = nfs_async_rename_release, +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_rename_prepare, +#endif /* CONFIG_NFS_V4_1 */ +}; + +/** + * nfs_async_rename - perform an asynchronous rename operation + * @old_dir: directory that currently holds the dentry to be renamed + * @new_dir: target directory for the rename + * @old_dentry: original dentry to be renamed + * @new_dentry: dentry to which the old_dentry should be renamed + * + * It's expected that valid references to the dentries and inodes are held + */ +static struct rpc_task * +nfs_async_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, struct dentry *new_dentry) +{ + struct nfs_renamedata *data; + struct rpc_message msg = { }; + struct rpc_task_setup task_setup_data = { + .rpc_message = &msg, + .callback_ops = &nfs_rename_ops, + .workqueue = nfsiod_workqueue, + .rpc_client = NFS_CLIENT(old_dir), + .flags = RPC_TASK_ASYNC, + }; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return ERR_PTR(-ENOMEM); + task_setup_data.callback_data = data; + + data->cred = rpc_lookup_cred(); + if (IS_ERR(data->cred)) { + struct rpc_task *task = ERR_CAST(data->cred); + kfree(data); + return task; + } + + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + msg.rpc_cred = data->cred; + + /* set up nfs_renamedata */ + data->old_dir = old_dir; + ihold(old_dir); + data->new_dir = new_dir; + ihold(new_dir); + data->old_dentry = dget(old_dentry); + data->new_dentry = dget(new_dentry); + nfs_fattr_init(&data->old_fattr); + nfs_fattr_init(&data->new_fattr); + + /* set up nfs_renameargs */ + data->args.old_dir = NFS_FH(old_dir); + data->args.old_name = &old_dentry->d_name; + data->args.new_dir = NFS_FH(new_dir); + data->args.new_name = &new_dentry->d_name; + + /* set up nfs_renameres */ + data->res.old_fattr = &data->old_fattr; + data->res.new_fattr = &data->new_fattr; + + nfs_sb_active(old_dir->i_sb); + + NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir); + + return rpc_run_task(&task_setup_data); +} + +/** + * nfs_sillyrename - Perform a silly-rename of a dentry + * @dir: inode of directory that contains dentry + * @dentry: dentry to be sillyrenamed + * + * NFSv2/3 is stateless and the server doesn't know when the client is + * holding a file open. To prevent application problems when a file is + * unlinked while it's still open, the client performs a "silly-rename". + * That is, it renames the file to a hidden file in the same directory, + * and only performs the unlink once the last reference to it is put. + * + * The final cleanup is done during dentry_iput. + */ +int +nfs_sillyrename(struct inode *dir, struct dentry *dentry) +{ + static unsigned int sillycounter; + const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; + const int countersize = sizeof(sillycounter)*2; + const int slen = sizeof(".nfs")+fileidsize+countersize-1; + char silly[slen+1]; + struct dentry *sdentry; + struct rpc_task *task; + int error = -EIO; + + dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + dentry->d_count); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); + + /* + * We don't allow a dentry to be silly-renamed twice. + */ + error = -EBUSY; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + goto out; + + sprintf(silly, ".nfs%*.*Lx", + fileidsize, fileidsize, + (unsigned long long)NFS_FILEID(dentry->d_inode)); + + /* Return delegation in anticipation of the rename */ + nfs_inode_return_delegation(dentry->d_inode); + + sdentry = NULL; + do { + char *suffix = silly + slen - countersize; + + dput(sdentry); + sillycounter++; + sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); + + dfprintk(VFS, "NFS: trying to rename %s to %s\n", + dentry->d_name.name, silly); + + sdentry = lookup_one_len(silly, dentry->d_parent, slen); + /* + * N.B. Better to return EBUSY here ... it could be + * dangerous to delete the file while it's in use. + */ + if (IS_ERR(sdentry)) + goto out; + } while (sdentry->d_inode != NULL); /* need negative lookup */ + + /* queue unlink first. Can't do this from rpc_release as it + * has to allocate memory + */ + error = nfs_async_unlink(dir, dentry); + if (error) + goto out_dput; + + /* run the rename task, undo unlink if it fails */ + task = nfs_async_rename(dir, dir, dentry, sdentry); + if (IS_ERR(task)) { + error = -EBUSY; + nfs_cancel_async_unlink(dentry); + goto out_dput; + } + + /* wait for the RPC task to complete, unless a SIGKILL intervenes */ + error = rpc_wait_for_completion_task(task); + if (error == 0) + error = task->tk_status; + rpc_put_task(task); +out_dput: + dput(sdentry); +out: + return error; +} diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 874972d9427..c8278f4046c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } return p; } @@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -292,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); nfs_pageio_cond_complete(pgio, page->index); - ret = nfs_page_async_flush(pgio, page, - wbc->sync_mode == WB_SYNC_NONE || - wbc->nonblocking != 0); + ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); ret = 0; @@ -394,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } + set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; @@ -419,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); + clear_bit(PG_MAPPED, &req->wb_flags); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { @@ -426,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) iput(inode); } else spin_unlock(&inode->i_lock); - nfs_clear_request(req); nfs_release_request(req); } @@ -935,7 +932,7 @@ out_bad: while (!list_empty(&list)) { data = list_entry(list.next, struct nfs_write_data, pages); list_del(&data->pages); - nfs_writedata_release(data); + nfs_writedata_free(data); } nfs_redirty_request(req); return -ENOMEM; @@ -1433,15 +1430,17 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int flags = FLUSH_SYNC; int ret = 0; - /* Don't commit yet if this is a non-blocking flush and there are - * lots of outstanding writes for this mapping. - */ - if (wbc->sync_mode == WB_SYNC_NONE && - nfsi->ncommit <= (nfsi->npages >> 1)) - goto out_mark_dirty; + if (wbc->sync_mode == WB_SYNC_NONE) { + /* Don't commit yet if this is a non-blocking flush and there + * are a lot of outstanding writes for this mapping. + */ + if (nfsi->ncommit <= (nfsi->npages >> 1)) + goto out_mark_dirty; - if (wbc->nonblocking || wbc->for_background) + /* don't wait for the COMMIT response */ flags = 0; + } + ret = nfs_commit_inode(inode, flags); if (ret >= 0) { if (wbc->sync_mode == WB_SYNC_NONE) { |