diff options
Diffstat (limited to 'net/sunrpc')
30 files changed, 2471 insertions, 1102 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index db73fd2a3f0..9d2fca5ad14 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ - rpcb_clnt.o timer.o xdr.o \ + addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c new file mode 100644 index 00000000000..c7450c8f0a7 --- /dev/null +++ b/net/sunrpc/addr.c @@ -0,0 +1,362 @@ +/* + * Copyright 2009, Oracle. All rights reserved. + * + * Convert socket addresses to presentation addresses and universal + * addresses, and vice versa. + * + * Universal addresses are introduced by RFC 1833 and further refined by + * recent RFCs describing NFSv4. The universal address format is part + * of the external (network) interface provided by rpcbind version 3 + * and 4, and by NFSv4. Such an address is a string containing a + * presentation format IP address followed by a port number in + * "hibyte.lobyte" format. + * + * IPv6 addresses can also include a scope ID, typically denoted by + * a '%' followed by a device name or a non-negative integer. Refer to + * RFC 4291, Section 2.2 for details on IPv6 presentation formats. + */ + +#include <net/ipv6.h> +#include <linux/sunrpc/clnt.h> + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + * + * To keep the result as short as possible, especially + * since we don't shorthand, we don't want leading zeros + * in each halfword, so avoid %pI6. + */ + return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", + ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), + ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), + ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), + ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + char scopebuf[IPV6_SCOPE_ID_LEN]; + size_t len; + int rc; + + len = rpc_ntop6_noscopeid(sap, buf, buflen); + if (unlikely(len == 0)) + return len; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return len; + + rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", + IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id); + if (unlikely((size_t)rc > sizeof(scopebuf))) + return 0; + + len += rc; + if (unlikely(len > buflen)) + return 0; + + strcat(buf, scopebuf); + return len; +} + +#else /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + return 0; +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + return 0; +} + +#endif /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static int rpc_ntop4(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +/** + * rpc_ntop - construct a presentation address in @buf + * @sap: socket address + * @buf: construction area + * @buflen: size of @buf, in bytes + * + * Plants a %NUL-terminated string in @buf and returns the length + * of the string, excluding the %NUL. Otherwise zero is returned. + */ +size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + switch (sap->sa_family) { + case AF_INET: + return rpc_ntop4(sap, buf, buflen); + case AF_INET6: + return rpc_ntop6(sap, buf, buflen); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_ntop); + +static size_t rpc_pton4(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&sin->sin_addr.s_addr; + + if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in)); + + if (in4_pton(buf, buflen, addr, '\0', NULL) == 0) + return 0; + + sin->sin_family = AF_INET; + return sizeof(struct sockaddr_in);; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int rpc_parse_scope_id(const char *buf, const size_t buflen, + const char *delim, struct sockaddr_in6 *sin6) +{ + char *p; + size_t len; + + if ((buf + buflen) == delim) + return 1; + + if (*delim != IPV6_SCOPE_DELIMITER) + return 0; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return 0; + + len = (buf + buflen) - delim - 1; + p = kstrndup(delim + 1, len, GFP_KERNEL); + if (p) { + unsigned long scope_id = 0; + struct net_device *dev; + + dev = dev_get_by_name(&init_net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + if (strict_strtoul(p, 10, &scope_id) == 0) { + kfree(p); + return 0; + } + } + + kfree(p); + + sin6->sin6_scope_id = scope_id; + return 1; + } + + return 0; +} + +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + const char *delim; + + if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) || + salen < sizeof(struct sockaddr_in6)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in6)); + + if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) + return 0; + + if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) + return 0; + + sin6->sin6_family = AF_INET6; + return sizeof(struct sockaddr_in6); +} +#else +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + return 0; +} +#endif + +/** + * rpc_pton - Construct a sockaddr in @sap + * @buf: C string containing presentation format IP address + * @buflen: length of presentation address in bytes + * @sap: buffer into which to plant socket address + * @salen: size of buffer in bytes + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + * + * Plants a socket address in @sap and returns the size of the + * socket address, if successful. Returns zero if an error + * occurred. + */ +size_t rpc_pton(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + unsigned int i; + + for (i = 0; i < buflen; i++) + if (buf[i] == ':') + return rpc_pton6(buf, buflen, sap, salen); + return rpc_pton4(buf, buflen, sap, salen); +} +EXPORT_SYMBOL_GPL(rpc_pton); + +/** + * rpc_sockaddr2uaddr - Construct a universal address string from @sap. + * @sap: socket address + * + * Returns a %NUL-terminated string in dynamically allocated memory; + * otherwise NULL is returned if an error occurred. Caller must + * free the returned string. + */ +char *rpc_sockaddr2uaddr(const struct sockaddr *sap) +{ + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + unsigned short port; + + switch (sap->sa_family) { + case AF_INET: + if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + break; + case AF_INET6: + if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + break; + default: + return NULL; + } + + if (snprintf(portbuf, sizeof(portbuf), + ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) + return NULL; + + if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) + return NULL; + + return kstrdup(addrbuf, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); + +/** + * rpc_uaddr2sockaddr - convert a universal address to a socket address. + * @uaddr: C string containing universal address to convert + * @uaddr_len: length of universal address string + * @sap: buffer into which to plant socket address + * @salen: size of buffer + * + * @uaddr does not have to be '\0'-terminated, but strict_strtoul() and + * rpc_pton() require proper string termination to be successful. + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + */ +size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, + struct sockaddr *sap, const size_t salen) +{ + char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; + unsigned long portlo, porthi; + unsigned short port; + + if (uaddr_len > RPCBIND_MAXUADDRLEN) + return 0; + + memcpy(buf, uaddr, uaddr_len); + + buf[uaddr_len] = '\0'; + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) + return 0; + if (unlikely(portlo > 255)) + return 0; + + *c = '\0'; + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) + return 0; + if (unlikely(porthi > 255)) + return 0; + + port = (unsigned short)((porthi << 8) | portlo); + + *c = '\0'; + if (rpc_pton(buf, strlen(buf), sap, salen) == 0) + return 0; + + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + return sizeof(struct sockaddr_in); + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + return sizeof(struct sockaddr_in6); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 0c431c277af..7535a7bed2f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -332,9 +332,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, list_add_tail(&new->cr_lru, &free); spin_unlock(&cache->lock); found: - if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) - && cred->cr_ops->cr_init != NULL - && !(flags & RPCAUTH_LOOKUP_NEW)) { + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && + cred->cr_ops->cr_init != NULL && + !(flags & RPCAUTH_LOOKUP_NEW)) { int res = cred->cr_ops->cr_init(auth, cred); if (res < 0) { put_rpccred(cred); @@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, EXPORT_SYMBOL_GPL(rpcauth_init_cred); void -rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { task->tk_msg.rpc_cred = get_rpccred(cred); dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, @@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); static void -rpcauth_bind_root_cred(struct rpc_task *task) +rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { @@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task) dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - ret = auth->au_ops->lookup_cred(auth, &acred, 0); + ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else @@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task) } static void -rpcauth_bind_new_cred(struct rpc_task *task) +rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, auth->au_ops->au_name); - ret = rpcauth_lookupcred(auth, 0); + ret = rpcauth_lookupcred(auth, lookupflags); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else @@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task) void rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { + int lookupflags = 0; + + if (flags & RPC_TASK_ASYNC) + lookupflags |= RPCAUTH_LOOKUP_NEW; if (cred != NULL) - cred->cr_ops->crbind(task, cred); + cred->cr_ops->crbind(task, cred, lookupflags); else if (flags & RPC_TASK_ROOTCREDS) - rpcauth_bind_root_cred(task); + rpcauth_bind_root_cred(task, lookupflags); else - rpcauth_bind_new_cred(task); + rpcauth_bind_new_cred(task, lookupflags); } void diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 4028502f052..bf88bf8e936 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void) EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); static void -generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; struct rpc_cred *ret; - ret = auth->au_ops->lookup_cred(auth, acred, 0); + ret = auth->au_ops->lookup_cred(auth, acred, lookupflags); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 66d458fc692..fc6a43ccd95 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -89,8 +89,8 @@ static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); static void gss_free_ctx(struct gss_cl_ctx *); -static struct rpc_pipe_ops gss_upcall_ops_v0; -static struct rpc_pipe_ops gss_upcall_ops_v1; +static const struct rpc_pipe_ops gss_upcall_ops_v0; +static const struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) @@ -777,7 +777,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, "gssd", clnt, &gss_upcall_ops_v1, RPC_PIPE_WAIT_FOR_OPEN); @@ -786,7 +786,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } - gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops_v0, RPC_PIPE_WAIT_FOR_OPEN); @@ -1507,7 +1507,7 @@ static const struct rpc_credops gss_nullops = { .crunwrap_resp = gss_unwrap_resp, }; -static struct rpc_pipe_ops gss_upcall_ops_v0 = { +static const struct rpc_pipe_ops gss_upcall_ops_v0 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, @@ -1515,7 +1515,7 @@ static struct rpc_pipe_ops gss_upcall_ops_v0 = { .release_pipe = gss_pipe_release, }; -static struct rpc_pipe_ops gss_upcall_ops_v1 = { +static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index f160be6c1a4..17562b4c35f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -75,8 +75,8 @@ krb5_get_seq_num(struct crypto_blkcipher *key, if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) return code; - if ((plain[4] != plain[5]) || (plain[4] != plain[6]) - || (plain[4] != plain[7])) + if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || + (plain[4] != plain[7])) return (s32)KG_BAD_SEQ; *direction = plain[4]; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 2278a50c644..e34bc531fcb 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -105,8 +105,8 @@ static int rsi_match(struct cache_head *a, struct cache_head *b) { struct rsi *item = container_of(a, struct rsi, h); struct rsi *tmp = container_of(b, struct rsi, h); - return netobj_equal(&item->in_handle, &tmp->in_handle) - && netobj_equal(&item->in_token, &tmp->in_token); + return netobj_equal(&item->in_handle, &tmp->in_handle) && + netobj_equal(&item->in_token, &tmp->in_token); } static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len) @@ -181,6 +181,11 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, rsi_request); +} + static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -270,7 +275,7 @@ static struct cache_detail rsi_cache = { .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_request = rsi_request, + .cache_upcall = rsi_upcall, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, @@ -1369,8 +1374,10 @@ svcauth_gss_release(struct svc_rqst *rqstp) if (stat) goto out_err; break; - default: - goto out_err; + /* + * For any other gc_svc value, svcauth_gss_accept() already set + * the auth_error appropriately; just fall through: + */ } out: diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index c70dd7f5258..1db618f56ec 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/module.h> -#include <linux/utsname.h> #include <linux/sunrpc/clnt.h> #ifdef RPC_DEBUG diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ff0c23053d2..39bddba53ba 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -27,10 +27,12 @@ #include <linux/net.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/pagemap.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #define RPCDBG_FACILITY RPCDBG_CACHE @@ -101,23 +103,21 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); -static void queue_loose(struct cache_detail *detail, struct cache_head *ch); +static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); -static int cache_fresh_locked(struct cache_head *head, time_t expiry) +static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; head->last_refresh = get_seconds(); - return !test_and_set_bit(CACHE_VALID, &head->flags); + set_bit(CACHE_VALID, &head->flags); } static void cache_fresh_unlocked(struct cache_head *head, - struct cache_detail *detail, int new) + struct cache_detail *detail) { - if (new) - cache_revisit_request(head); if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { cache_revisit_request(head); - queue_loose(detail, head); + cache_dequeue(detail, head); } } @@ -130,7 +130,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, */ struct cache_head **head; struct cache_head *tmp; - int is_new; if (!test_bit(CACHE_VALID, &old->flags)) { write_lock(&detail->hash_lock); @@ -139,9 +138,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, set_bit(CACHE_NEGATIVE, &old->flags); else detail->update(old, new); - is_new = cache_fresh_locked(old, new->expiry_time); + cache_fresh_locked(old, new->expiry_time); write_unlock(&detail->hash_lock); - cache_fresh_unlocked(old, detail, is_new); + cache_fresh_unlocked(old, detail); return old; } write_unlock(&detail->hash_lock); @@ -165,17 +164,39 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, *head = tmp; detail->entries++; cache_get(tmp); - is_new = cache_fresh_locked(tmp, new->expiry_time); + cache_fresh_locked(tmp, new->expiry_time); cache_fresh_locked(old, 0); write_unlock(&detail->hash_lock); - cache_fresh_unlocked(tmp, detail, is_new); - cache_fresh_unlocked(old, detail, 0); + cache_fresh_unlocked(tmp, detail); + cache_fresh_unlocked(old, detail); cache_put(old, detail); return tmp; } EXPORT_SYMBOL_GPL(sunrpc_cache_update); -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); +static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) +{ + if (!cd->cache_upcall) + return -EINVAL; + return cd->cache_upcall(cd, h); +} + +static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) +{ + if (!test_bit(CACHE_VALID, &h->flags) || + h->expiry_time < get_seconds()) + return -EAGAIN; + else if (detail->flush_time > h->last_refresh) + return -EAGAIN; + else { + /* entry is valid */ + if (test_bit(CACHE_NEGATIVE, &h->flags)) + return -ENOENT; + else + return 0; + } +} + /* * This is the generic cache management routine for all * the authentication caches. @@ -184,8 +205,10 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); * * * Returns 0 if the cache_head can be used, or cache_puts it and returns - * -EAGAIN if upcall is pending, - * -ETIMEDOUT if upcall failed and should be retried, + * -EAGAIN if upcall is pending and request has been queued + * -ETIMEDOUT if upcall failed or request could not be queue or + * upcall completed but item is still invalid (implying that + * the cache item has been replaced with a newer one). * -ENOENT if cache entry was negative */ int cache_check(struct cache_detail *detail, @@ -195,17 +218,7 @@ int cache_check(struct cache_detail *detail, long refresh_age, age; /* First decide return status as best we can */ - if (!test_bit(CACHE_VALID, &h->flags) || - h->expiry_time < get_seconds()) - rv = -EAGAIN; - else if (detail->flush_time > h->last_refresh) - rv = -EAGAIN; - else { - /* entry is valid */ - if (test_bit(CACHE_NEGATIVE, &h->flags)) - rv = -ENOENT; - else rv = 0; - } + rv = cache_is_valid(detail, h); /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); @@ -221,10 +234,11 @@ int cache_check(struct cache_detail *detail, switch (cache_make_upcall(detail, h)) { case -EINVAL: clear_bit(CACHE_PENDING, &h->flags); + cache_revisit_request(h); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_unlocked(h, detail, - cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY)); + cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); + cache_fresh_unlocked(h, detail); rv = -ENOENT; } break; @@ -237,10 +251,14 @@ int cache_check(struct cache_detail *detail, } } - if (rv == -EAGAIN) - if (cache_defer_req(rqstp, h) != 0) - rv = -ETIMEDOUT; - + if (rv == -EAGAIN) { + if (cache_defer_req(rqstp, h) < 0) { + /* Request is not deferred */ + rv = cache_is_valid(detail, h); + if (rv == -EAGAIN) + rv = -ETIMEDOUT; + } + } if (rv) cache_put(h, detail); return rv; @@ -284,76 +302,11 @@ static DEFINE_SPINLOCK(cache_list_lock); static struct cache_detail *current_detail; static int current_index; -static const struct file_operations cache_file_operations; -static const struct file_operations content_file_operations; -static const struct file_operations cache_flush_operations; - static void do_cache_clean(struct work_struct *work); static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); -static void remove_cache_proc_entries(struct cache_detail *cd) +static void sunrpc_init_cache_detail(struct cache_detail *cd) { - if (cd->proc_ent == NULL) - return; - if (cd->flush_ent) - remove_proc_entry("flush", cd->proc_ent); - if (cd->channel_ent) - remove_proc_entry("channel", cd->proc_ent); - if (cd->content_ent) - remove_proc_entry("content", cd->proc_ent); - cd->proc_ent = NULL; - remove_proc_entry(cd->name, proc_net_rpc); -} - -#ifdef CONFIG_PROC_FS -static int create_cache_proc_entries(struct cache_detail *cd) -{ - struct proc_dir_entry *p; - - cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); - if (cd->proc_ent == NULL) - goto out_nomem; - cd->channel_ent = cd->content_ent = NULL; - - p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_flush_operations, cd); - cd->flush_ent = p; - if (p == NULL) - goto out_nomem; - - if (cd->cache_request || cd->cache_parse) { - p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_file_operations, cd); - cd->channel_ent = p; - if (p == NULL) - goto out_nomem; - } - if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &content_file_operations, cd); - cd->content_ent = p; - if (p == NULL) - goto out_nomem; - } - return 0; -out_nomem: - remove_cache_proc_entries(cd); - return -ENOMEM; -} -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd) -{ - return 0; -} -#endif - -int cache_register(struct cache_detail *cd) -{ - int ret; - - ret = create_cache_proc_entries(cd); - if (ret) - return ret; rwlock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); @@ -367,11 +320,9 @@ int cache_register(struct cache_detail *cd) /* start the cleaning process */ schedule_delayed_work(&cache_cleaner, 0); - return 0; } -EXPORT_SYMBOL_GPL(cache_register); -void cache_unregister(struct cache_detail *cd) +static void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); @@ -386,7 +337,6 @@ void cache_unregister(struct cache_detail *cd) list_del_init(&cd->others); write_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); - remove_cache_proc_entries(cd); if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ cancel_delayed_work_sync(&cache_cleaner); @@ -395,7 +345,6 @@ void cache_unregister(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } -EXPORT_SYMBOL_GPL(cache_unregister); /* clean cache tries to find something to clean * and cleans it. @@ -452,12 +401,11 @@ static int cache_clean(void) for (; ch; cp= & ch->next, ch= *cp) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; - if (ch->expiry_time >= get_seconds() - && ch->last_refresh >= current_detail->flush_time - ) + if (ch->expiry_time >= get_seconds() && + ch->last_refresh >= current_detail->flush_time) continue; if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) - queue_loose(current_detail, ch); + cache_dequeue(current_detail, ch); if (atomic_read(&ch->ref.refcount) == 1) break; @@ -473,8 +421,10 @@ static int cache_clean(void) if (!ch) current_index ++; spin_unlock(&cache_list_lock); - if (ch) + if (ch) { + cache_revisit_request(ch); cache_put(ch, d); + } } else spin_unlock(&cache_list_lock); @@ -549,7 +499,7 @@ static int cache_defer_cnt; static int cache_defer_req(struct cache_req *req, struct cache_head *item) { - struct cache_deferred_req *dreq; + struct cache_deferred_req *dreq, *discard; int hash = DFR_HASH(item); if (cache_defer_cnt >= DFR_MAX) { @@ -557,11 +507,11 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) * or continue and drop the oldest below */ if (net_random()&1) - return -ETIMEDOUT; + return -ENOMEM; } dreq = req->defer(req); if (dreq == NULL) - return -ETIMEDOUT; + return -ENOMEM; dreq->item = item; @@ -574,23 +524,24 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) list_add(&dreq->hash, &cache_defer_hash[hash]); /* it is in, now maybe clean up */ - dreq = NULL; + discard = NULL; if (++cache_defer_cnt > DFR_MAX) { - dreq = list_entry(cache_defer_list.prev, - struct cache_deferred_req, recent); - list_del(&dreq->recent); - list_del(&dreq->hash); + discard = list_entry(cache_defer_list.prev, + struct cache_deferred_req, recent); + list_del_init(&discard->recent); + list_del_init(&discard->hash); cache_defer_cnt--; } spin_unlock(&cache_defer_lock); - if (dreq) { + if (discard) /* there was one too many */ - dreq->revisit(dreq, 1); - } + discard->revisit(discard, 1); + if (!test_bit(CACHE_PENDING, &item->flags)) { /* must have just been validated... */ cache_revisit_request(item); + return -EAGAIN; } return 0; } @@ -612,7 +563,7 @@ static void cache_revisit_request(struct cache_head *item) dreq = list_entry(lp, struct cache_deferred_req, hash); lp = lp->next; if (dreq->item == item) { - list_del(&dreq->hash); + list_del_init(&dreq->hash); list_move(&dreq->recent, &pending); cache_defer_cnt--; } @@ -638,7 +589,7 @@ void cache_clean_deferred(void *owner) list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { if (dreq->owner == owner) { - list_del(&dreq->hash); + list_del_init(&dreq->hash); list_move(&dreq->recent, &pending); cache_defer_cnt--; } @@ -687,18 +638,18 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; -static ssize_t -cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) +static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, + loff_t *ppos, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct inode *inode = filp->f_path.dentry->d_inode; int err; if (count == 0) return 0; - mutex_lock(&queue_io_mutex); /* protect against multiple concurrent + mutex_lock(&inode->i_mutex); /* protect against multiple concurrent * readers on this file */ again: spin_lock(&queue_lock); @@ -711,7 +662,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) } if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); - mutex_unlock(&queue_io_mutex); + mutex_unlock(&inode->i_mutex); BUG_ON(rp->offset); return 0; } @@ -758,49 +709,90 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) } if (err == -EAGAIN) goto again; - mutex_unlock(&queue_io_mutex); + mutex_unlock(&inode->i_mutex); return err ? err : count; } -static char write_buf[8192]; /* protected by queue_io_mutex */ +static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, + size_t count, struct cache_detail *cd) +{ + ssize_t ret; + + if (copy_from_user(kaddr, buf, count)) + return -EFAULT; + kaddr[count] = '\0'; + ret = cd->cache_parse(cd, kaddr, count); + if (!ret) + ret = count; + return ret; +} -static ssize_t -cache_write(struct file *filp, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t cache_slow_downcall(const char __user *buf, + size_t count, struct cache_detail *cd) { - int err; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + static char write_buf[8192]; /* protected by queue_io_mutex */ + ssize_t ret = -EINVAL; - if (count == 0) - return 0; if (count >= sizeof(write_buf)) - return -EINVAL; - + goto out; mutex_lock(&queue_io_mutex); + ret = cache_do_downcall(write_buf, buf, count, cd); + mutex_unlock(&queue_io_mutex); +out: + return ret; +} - if (copy_from_user(write_buf, buf, count)) { - mutex_unlock(&queue_io_mutex); - return -EFAULT; - } - write_buf[count] = '\0'; - if (cd->cache_parse) - err = cd->cache_parse(cd, write_buf, count); - else - err = -EINVAL; +static ssize_t cache_downcall(struct address_space *mapping, + const char __user *buf, + size_t count, struct cache_detail *cd) +{ + struct page *page; + char *kaddr; + ssize_t ret = -ENOMEM; + + if (count >= PAGE_CACHE_SIZE) + goto out_slow; + + page = find_or_create_page(mapping, 0, GFP_KERNEL); + if (!page) + goto out_slow; + + kaddr = kmap(page); + ret = cache_do_downcall(kaddr, buf, count, cd); + kunmap(page); + unlock_page(page); + page_cache_release(page); + return ret; +out_slow: + return cache_slow_downcall(buf, count, cd); +} - mutex_unlock(&queue_io_mutex); - return err ? err : count; +static ssize_t cache_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) +{ + struct address_space *mapping = filp->f_mapping; + struct inode *inode = filp->f_path.dentry->d_inode; + ssize_t ret = -EINVAL; + + if (!cd->cache_parse) + goto out; + + mutex_lock(&inode->i_mutex); + ret = cache_downcall(mapping, buf, count, cd); + mutex_unlock(&inode->i_mutex); +out: + return ret; } static DECLARE_WAIT_QUEUE_HEAD(queue_wait); -static unsigned int -cache_poll(struct file *filp, poll_table *wait) +static unsigned int cache_poll(struct file *filp, poll_table *wait, + struct cache_detail *cd) { unsigned int mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; poll_wait(filp, &queue_wait, wait); @@ -822,14 +814,13 @@ cache_poll(struct file *filp, poll_table *wait) return mask; } -static int -cache_ioctl(struct inode *ino, struct file *filp, - unsigned int cmd, unsigned long arg) +static int cache_ioctl(struct inode *ino, struct file *filp, + unsigned int cmd, unsigned long arg, + struct cache_detail *cd) { int len = 0; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(ino)->data; if (cmd != FIONREAD || !rp) return -EINVAL; @@ -852,15 +843,15 @@ cache_ioctl(struct inode *ino, struct file *filp, return put_user(len, (int __user *)arg); } -static int -cache_open(struct inode *inode, struct file *filp) +static int cache_open(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = NULL; + if (!cd || !try_module_get(cd->owner)) + return -EACCES; nonseekable_open(inode, filp); if (filp->f_mode & FMODE_READ) { - struct cache_detail *cd = PDE(inode)->data; - rp = kmalloc(sizeof(*rp), GFP_KERNEL); if (!rp) return -ENOMEM; @@ -875,11 +866,10 @@ cache_open(struct inode *inode, struct file *filp) return 0; } -static int -cache_release(struct inode *inode, struct file *filp) +static int cache_release(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; - struct cache_detail *cd = PDE(inode)->data; if (rp) { spin_lock(&queue_lock); @@ -903,24 +893,13 @@ cache_release(struct inode *inode, struct file *filp) cd->last_close = get_seconds(); atomic_dec(&cd->readers); } + module_put(cd->owner); return 0; } -static const struct file_operations cache_file_operations = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = cache_read, - .write = cache_write, - .poll = cache_poll, - .ioctl = cache_ioctl, /* for FIONREAD */ - .open = cache_open, - .release = cache_release, -}; - - -static void queue_loose(struct cache_detail *detail, struct cache_head *ch) +static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { struct cache_queue *cq; spin_lock(&queue_lock); @@ -1020,15 +999,21 @@ static void warn_no_listener(struct cache_detail *detail) if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; if (detail->warn_no_listener) - detail->warn_no_listener(detail); + detail->warn_no_listener(detail, detail->last_close != 0); } } /* - * register an upcall request to user-space. + * register an upcall request to user-space and queue it up for read() by the + * upcall daemon. + * * Each request is at most one page long. */ -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, + void (*cache_request)(struct cache_detail *, + struct cache_head *, + char **, + int *)) { char *buf; @@ -1036,9 +1021,6 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) char *bp; int len; - if (detail->cache_request == NULL) - return -EINVAL; - if (atomic_read(&detail->readers) == 0 && detail->last_close < get_seconds() - 30) { warn_no_listener(detail); @@ -1057,7 +1039,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) bp = buf; len = PAGE_SIZE; - detail->cache_request(detail, h, &bp, &len); + cache_request(detail, h, &bp, &len); if (len < 0) { kfree(buf); @@ -1075,6 +1057,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) wake_up(&queue_wait); return 0; } +EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); /* * parse a message from user-space and pass it @@ -1242,11 +1225,13 @@ static const struct seq_operations cache_content_op = { .show = c_show, }; -static int content_open(struct inode *inode, struct file *file) +static int content_open(struct inode *inode, struct file *file, + struct cache_detail *cd) { struct handle *han; - struct cache_detail *cd = PDE(inode)->data; + if (!cd || !try_module_get(cd->owner)) + return -EACCES; han = __seq_open_private(file, &cache_content_op, sizeof(*han)); if (han == NULL) return -ENOMEM; @@ -1255,17 +1240,33 @@ static int content_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations content_file_operations = { - .open = content_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; +static int content_release(struct inode *inode, struct file *file, + struct cache_detail *cd) +{ + int ret = seq_release_private(inode, file); + module_put(cd->owner); + return ret; +} + +static int open_flush(struct inode *inode, struct file *file, + struct cache_detail *cd) +{ + if (!cd || !try_module_get(cd->owner)) + return -EACCES; + return nonseekable_open(inode, file); +} + +static int release_flush(struct inode *inode, struct file *file, + struct cache_detail *cd) +{ + module_put(cd->owner); + return 0; +} static ssize_t read_flush(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; unsigned long p = *ppos; size_t len; @@ -1283,10 +1284,10 @@ static ssize_t read_flush(struct file *file, char __user *buf, return len; } -static ssize_t write_flush(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t write_flush(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; char *ep; long flushtime; @@ -1307,8 +1308,343 @@ static ssize_t write_flush(struct file * file, const char __user * buf, return count; } -static const struct file_operations cache_flush_operations = { - .open = nonseekable_open, - .read = read_flush, - .write = write_flush, +static ssize_t cache_read_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_procfs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_open(inode, filp, cd); +} + +static int cache_release_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_release(inode, filp, cd); +} + +static const struct file_operations cache_file_operations_procfs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_procfs, + .write = cache_write_procfs, + .poll = cache_poll_procfs, + .ioctl = cache_ioctl_procfs, /* for FIONREAD */ + .open = cache_open_procfs, + .release = cache_release_procfs, +}; + +static int content_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return content_open(inode, filp, cd); +} + +static int content_release_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return content_release(inode, filp, cd); +} + +static const struct file_operations content_file_operations_procfs = { + .open = content_open_procfs, + .read = seq_read, + .llseek = seq_lseek, + .release = content_release_procfs, }; + +static int open_flush_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return open_flush(inode, filp, cd); +} + +static int release_flush_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return release_flush(inode, filp, cd); +} + +static ssize_t read_flush_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_procfs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return write_flush(filp, buf, count, ppos, cd); +} + +static const struct file_operations cache_flush_operations_procfs = { + .open = open_flush_procfs, + .read = read_flush_procfs, + .write = write_flush_procfs, + .release = release_flush_procfs, +}; + +static void remove_cache_proc_entries(struct cache_detail *cd) +{ + if (cd->u.procfs.proc_ent == NULL) + return; + if (cd->u.procfs.flush_ent) + remove_proc_entry("flush", cd->u.procfs.proc_ent); + if (cd->u.procfs.channel_ent) + remove_proc_entry("channel", cd->u.procfs.proc_ent); + if (cd->u.procfs.content_ent) + remove_proc_entry("content", cd->u.procfs.proc_ent); + cd->u.procfs.proc_ent = NULL; + remove_proc_entry(cd->name, proc_net_rpc); +} + +#ifdef CONFIG_PROC_FS +static int create_cache_proc_entries(struct cache_detail *cd) +{ + struct proc_dir_entry *p; + + cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); + if (cd->u.procfs.proc_ent == NULL) + goto out_nomem; + cd->u.procfs.channel_ent = NULL; + cd->u.procfs.content_ent = NULL; + + p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_flush_operations_procfs, cd); + cd->u.procfs.flush_ent = p; + if (p == NULL) + goto out_nomem; + + if (cd->cache_upcall || cd->cache_parse) { + p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_file_operations_procfs, cd); + cd->u.procfs.channel_ent = p; + if (p == NULL) + goto out_nomem; + } + if (cd->cache_show) { + p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &content_file_operations_procfs, cd); + cd->u.procfs.content_ent = p; + if (p == NULL) + goto out_nomem; + } + return 0; +out_nomem: + remove_cache_proc_entries(cd); + return -ENOMEM; +} +#else /* CONFIG_PROC_FS */ +static int create_cache_proc_entries(struct cache_detail *cd) +{ + return 0; +} +#endif + +int cache_register(struct cache_detail *cd) +{ + int ret; + + sunrpc_init_cache_detail(cd); + ret = create_cache_proc_entries(cd); + if (ret) + sunrpc_destroy_cache_detail(cd); + return ret; +} +EXPORT_SYMBOL_GPL(cache_register); + +void cache_unregister(struct cache_detail *cd) +{ + remove_cache_proc_entries(cd); + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(cache_unregister); + +static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_open(inode, filp, cd); +} + +static int cache_release_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_release(inode, filp, cd); +} + +const struct file_operations cache_file_operations_pipefs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_pipefs, + .write = cache_write_pipefs, + .poll = cache_poll_pipefs, + .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ + .open = cache_open_pipefs, + .release = cache_release_pipefs, +}; + +static int content_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return content_open(inode, filp, cd); +} + +static int content_release_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return content_release(inode, filp, cd); +} + +const struct file_operations content_file_operations_pipefs = { + .open = content_open_pipefs, + .read = seq_read, + .llseek = seq_lseek, + .release = content_release_pipefs, +}; + +static int open_flush_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return open_flush(inode, filp, cd); +} + +static int release_flush_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return release_flush(inode, filp, cd); +} + +static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_pipefs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return write_flush(filp, buf, count, ppos, cd); +} + +const struct file_operations cache_flush_operations_pipefs = { + .open = open_flush_pipefs, + .read = read_flush_pipefs, + .write = write_flush_pipefs, + .release = release_flush_pipefs, +}; + +int sunrpc_cache_register_pipefs(struct dentry *parent, + const char *name, mode_t umode, + struct cache_detail *cd) +{ + struct qstr q; + struct dentry *dir; + int ret = 0; + + sunrpc_init_cache_detail(cd); + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len); + dir = rpc_create_cache_dir(parent, &q, umode, cd); + if (!IS_ERR(dir)) + cd->u.pipefs.dir = dir; + else { + sunrpc_destroy_cache_detail(cd); + ret = PTR_ERR(dir); + } + return ret; +} +EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); + +void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) +{ + rpc_remove_cache_dir(cd->u.pipefs.dir); + cd->u.pipefs.dir = NULL; + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); + diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index df1039f077c..38829e20500 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -27,6 +27,8 @@ #include <linux/types.h> #include <linux/kallsyms.h> #include <linux/mm.h> +#include <linux/namei.h> +#include <linux/mount.h> #include <linux/slab.h> #include <linux/utsname.h> #include <linux/workqueue.h> @@ -97,33 +99,49 @@ static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; + struct nameidata nd; + struct path path; + char name[15]; + struct qstr q = { + .name = name, + }; int error; - clnt->cl_vfsmnt = ERR_PTR(-ENOENT); - clnt->cl_dentry = ERR_PTR(-ENOENT); + clnt->cl_path.mnt = ERR_PTR(-ENOENT); + clnt->cl_path.dentry = ERR_PTR(-ENOENT); if (dir_name == NULL) return 0; - clnt->cl_vfsmnt = rpc_get_mount(); - if (IS_ERR(clnt->cl_vfsmnt)) - return PTR_ERR(clnt->cl_vfsmnt); + path.mnt = rpc_get_mount(); + if (IS_ERR(path.mnt)) + return PTR_ERR(path.mnt); + error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd); + if (error) + goto err; for (;;) { - snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), - "%s/clnt%x", dir_name, - (unsigned int)clntid++); - clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; - clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); - if (!IS_ERR(clnt->cl_dentry)) - return 0; - error = PTR_ERR(clnt->cl_dentry); + q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); + name[sizeof(name) - 1] = '\0'; + q.hash = full_name_hash(q.name, q.len); + path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt); + if (!IS_ERR(path.dentry)) + break; + error = PTR_ERR(path.dentry); if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", - clnt->cl_pathname, error); - rpc_put_mount(); - return error; + printk(KERN_INFO "RPC: Couldn't create pipefs entry" + " %s/%s, error %d\n", + dir_name, name, error); + goto err_path_put; } } + path_put(&nd.path); + clnt->cl_path = path; + return 0; +err_path_put: + path_put(&nd.path); +err: + rpc_put_mount(); + return error; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) @@ -231,8 +249,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } out_no_path: @@ -270,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, + .bc_xprt = args->bc_xprt, }; char servername[48]; @@ -423,8 +442,8 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { @@ -621,10 +640,11 @@ EXPORT_SYMBOL_GPL(rpc_call_async); /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it - * @ops: RPC call ops + * @req: RPC request + * @tk_ops: RPC call ops */ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *tk_ops) + const struct rpc_call_ops *tk_ops) { struct rpc_task *task; struct xdr_buf *xbufp = &req->rq_snd_buf; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9ced0628d69..49278f83036 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -26,6 +26,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/cache.h> static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; @@ -125,7 +126,7 @@ static void rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_pipe_ops *ops; + const struct rpc_pipe_ops *ops; int need_release; mutex_lock(&inode->i_mutex); @@ -398,66 +399,12 @@ static const struct file_operations rpc_info_operations = { /* - * We have a single directory with 1 node in it. - */ -enum { - RPCAUTH_Root = 1, - RPCAUTH_lockd, - RPCAUTH_mount, - RPCAUTH_nfs, - RPCAUTH_portmap, - RPCAUTH_statd, - RPCAUTH_nfsd4_cb, - RPCAUTH_RootEOF -}; - -/* * Description of fs contents. */ struct rpc_filelist { - char *name; + const char *name; const struct file_operations *i_fop; - int mode; -}; - -static struct rpc_filelist files[] = { - [RPCAUTH_lockd] = { - .name = "lockd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_mount] = { - .name = "mount", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfs] = { - .name = "nfs", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_portmap] = { - .name = "portmap", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_statd] = { - .name = "statd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfsd4_cb] = { - .name = "nfsd4_cb", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, -}; - -enum { - RPCAUTH_info = 2, - RPCAUTH_EOF -}; - -static struct rpc_filelist authfiles[] = { - [RPCAUTH_info] = { - .name = "info", - .i_fop = &rpc_info_operations, - .mode = S_IFREG | S_IRUSR, - }, + umode_t mode; }; struct vfsmount *rpc_get_mount(void) @@ -469,11 +416,13 @@ struct vfsmount *rpc_get_mount(void) return ERR_PTR(err); return rpc_mount; } +EXPORT_SYMBOL_GPL(rpc_get_mount); void rpc_put_mount(void) { simple_release_fs(&rpc_mount, &rpc_mount_count); } +EXPORT_SYMBOL_GPL(rpc_put_mount); static int rpc_delete_dentry(struct dentry *dentry) { @@ -484,39 +433,8 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; -static int -rpc_lookup_parent(char *path, struct nameidata *nd) -{ - struct vfsmount *mnt; - - if (path[0] == '\0') - return -ENOENT; - - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) { - printk(KERN_WARNING "%s: %s failed to mount " - "pseudofilesystem \n", __FILE__, __func__); - return PTR_ERR(mnt); - } - - if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) { - printk(KERN_WARNING "%s: %s failed to find path %s\n", - __FILE__, __func__, path); - rpc_put_mount(); - return -ENOENT; - } - return 0; -} - -static void -rpc_release_path(struct nameidata *nd) -{ - path_put(&nd->path); - rpc_put_mount(); -} - static struct inode * -rpc_get_inode(struct super_block *sb, int mode) +rpc_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (!inode) @@ -534,212 +452,274 @@ rpc_get_inode(struct super_block *sb, int mode) return inode; } -/* - * FIXME: This probably has races. - */ -static void rpc_depopulate(struct dentry *parent, - unsigned long start, unsigned long eof) +static int __rpc_create_common(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *dir = parent->d_inode; - struct list_head *pos, *next; - struct dentry *dentry, *dvec[10]; - int n = 0; + struct inode *inode; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); -repeat: - spin_lock(&dcache_lock); - list_for_each_safe(pos, next, &parent->d_subdirs) { - dentry = list_entry(pos, struct dentry, d_u.d_child); - if (!dentry->d_inode || - dentry->d_inode->i_ino < start || - dentry->d_inode->i_ino >= eof) - continue; - spin_lock(&dentry->d_lock); - if (!d_unhashed(dentry)) { - dget_locked(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - dvec[n++] = dentry; - if (n == ARRAY_SIZE(dvec)) - break; - } else - spin_unlock(&dentry->d_lock); - } - spin_unlock(&dcache_lock); - if (n) { - do { - dentry = dvec[--n]; - if (S_ISREG(dentry->d_inode->i_mode)) - simple_unlink(dir, dentry); - else if (S_ISDIR(dentry->d_inode->i_mode)) - simple_rmdir(dir, dentry); - d_delete(dentry); - dput(dentry); - } while (n); - goto repeat; - } - mutex_unlock(&dir->i_mutex); + BUG_ON(!d_unhashed(dentry)); + inode = rpc_get_inode(dir->i_sb, mode); + if (!inode) + goto out_err; + inode->i_ino = iunique(dir->i_sb, 100); + if (i_fop) + inode->i_fop = i_fop; + if (private) + rpc_inode_setowner(inode, private); + d_add(dentry, inode); + return 0; +out_err: + printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", + __FILE__, __func__, dentry->d_name.name); + dput(dentry); + return -ENOMEM; } -static int -rpc_populate(struct dentry *parent, - struct rpc_filelist *files, - int start, int eof) +static int __rpc_create(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *inode, *dir = parent->d_inode; - void *private = RPC_I(dir)->private; - struct dentry *dentry; - int mode, i; + int err; - mutex_lock(&dir->i_mutex); - for (i = start; i < eof; i++) { - dentry = d_alloc_name(parent, files[i].name); - if (!dentry) - goto out_bad; - dentry->d_op = &rpc_dentry_operations; - mode = files[i].mode; - inode = rpc_get_inode(dir->i_sb, mode); - if (!inode) { - dput(dentry); - goto out_bad; - } - inode->i_ino = i; - if (files[i].i_fop) - inode->i_fop = files[i].i_fop; - if (private) - rpc_inode_setowner(inode, private); - if (S_ISDIR(mode)) - inc_nlink(dir); - d_add(dentry, inode); - fsnotify_create(dir, dentry); - } - mutex_unlock(&dir->i_mutex); + err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private); + if (err) + return err; + fsnotify_create(dir, dentry); return 0; -out_bad: - mutex_unlock(&dir->i_mutex); - printk(KERN_WARNING "%s: %s failed to populate directory %s\n", - __FILE__, __func__, parent->d_name.name); - return -ENOMEM; } -static int -__rpc_mkdir(struct inode *dir, struct dentry *dentry) +static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *inode; + int err; - inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO); - if (!inode) - goto out_err; - inode->i_ino = iunique(dir->i_sb, 100); - d_instantiate(dentry, inode); + err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private); + if (err) + return err; inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; -out_err: - printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", - __FILE__, __func__, dentry->d_name.name); - return -ENOMEM; } -static int -__rpc_rmdir(struct inode *dir, struct dentry *dentry) +static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private, + const struct rpc_pipe_ops *ops, + int flags) { - int error; - error = simple_rmdir(dir, dentry); - if (!error) - d_delete(dentry); - return error; + struct rpc_inode *rpci; + int err; + + err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); + if (err) + return err; + rpci = RPC_I(dentry->d_inode); + rpci->nkern_readwriters = 1; + rpci->private = private; + rpci->flags = flags; + rpci->ops = ops; + fsnotify_create(dir, dentry); + return 0; } -static struct dentry * -rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) +static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) +{ + int ret; + + dget(dentry); + ret = simple_rmdir(dir, dentry); + d_delete(dentry); + dput(dentry); + return ret; +} + +static int __rpc_unlink(struct inode *dir, struct dentry *dentry) +{ + int ret; + + dget(dentry); + ret = simple_unlink(dir, dentry); + d_delete(dentry); + dput(dentry); + return ret; +} + +static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); + + rpci->nkern_readwriters--; + if (rpci->nkern_readwriters != 0) + return 0; + rpc_close_pipes(inode); + return __rpc_unlink(dir, dentry); +} + +static struct dentry *__rpc_lookup_create(struct dentry *parent, + struct qstr *name) { - struct inode *dir = parent->d_inode; struct dentry *dentry; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(name, parent, len); - if (IS_ERR(dentry)) - goto out_err; + dentry = d_lookup(parent, name); + if (!dentry) { + dentry = d_alloc(parent, name); + if (!dentry) { + dentry = ERR_PTR(-ENOMEM); + goto out_err; + } + } if (!dentry->d_inode) dentry->d_op = &rpc_dentry_operations; - else if (exclusive) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - goto out_err; - } - return dentry; out_err: - mutex_unlock(&dir->i_mutex); return dentry; } -static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) +static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, + struct qstr *name) { struct dentry *dentry; - int error; - if ((error = rpc_lookup_parent(path, nd)) != 0) - return ERR_PTR(error); - dentry = rpc_lookup_create(nd->path.dentry, nd->last.name, nd->last.len, - 1); - if (IS_ERR(dentry)) - rpc_release_path(nd); - return dentry; + dentry = __rpc_lookup_create(parent, name); + if (dentry->d_inode == NULL) + return dentry; + dput(dentry); + return ERR_PTR(-EEXIST); } -/** - * rpc_mkdir - Create a new directory in rpc_pipefs - * @path: path from the rpc_pipefs root to the new directory - * @rpc_client: rpc client to associate with this directory - * - * This creates a directory at the given @path associated with - * @rpc_clnt, which will contain a file named "info" with some basic - * information about the client, together with any "pipes" that may - * later be created using rpc_mkpipe(). +/* + * FIXME: This probably has races. */ -struct dentry * -rpc_mkdir(char *path, struct rpc_clnt *rpc_client) +static void __rpc_depopulate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof) { - struct nameidata nd; + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir; + struct qstr name; + int i; + + for (i = start; i < eof; i++) { + name.name = files[i].name; + name.len = strlen(files[i].name); + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + + if (dentry == NULL) + continue; + if (dentry->d_inode == NULL) + goto next; + switch (dentry->d_inode->i_mode & S_IFMT) { + default: + BUG(); + case S_IFREG: + __rpc_unlink(dir, dentry); + break; + case S_IFDIR: + __rpc_rmdir(dir, dentry); + } +next: + dput(dentry); + } +} + +static void rpc_depopulate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof) +{ + struct inode *dir = parent->d_inode; + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); + __rpc_depopulate(parent, files, start, eof); + mutex_unlock(&dir->i_mutex); +} + +static int rpc_populate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof, + void *private) +{ + struct inode *dir = parent->d_inode; + struct dentry *dentry; + int i, err; + + mutex_lock(&dir->i_mutex); + for (i = start; i < eof; i++) { + struct qstr q; + + q.name = files[i].name; + q.len = strlen(files[i].name); + q.hash = full_name_hash(q.name, q.len); + dentry = __rpc_lookup_create_exclusive(parent, &q); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_bad; + switch (files[i].mode & S_IFMT) { + default: + BUG(); + case S_IFREG: + err = __rpc_create(dir, dentry, + files[i].mode, + files[i].i_fop, + private); + break; + case S_IFDIR: + err = __rpc_mkdir(dir, dentry, + files[i].mode, + NULL, + private); + } + if (err != 0) + goto out_bad; + } + mutex_unlock(&dir->i_mutex); + return 0; +out_bad: + __rpc_depopulate(parent, files, start, eof); + mutex_unlock(&dir->i_mutex); + printk(KERN_WARNING "%s: %s failed to populate directory %s\n", + __FILE__, __func__, parent->d_name.name); + return err; +} + +static struct dentry *rpc_mkdir_populate(struct dentry *parent, + struct qstr *name, umode_t mode, void *private, + int (*populate)(struct dentry *, void *), void *args_populate) +{ + struct dentry *dentry; + struct inode *dir = parent->d_inode; int error; - dentry = rpc_lookup_negative(path, &nd); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) - return dentry; - dir = nd.path.dentry->d_inode; - if ((error = __rpc_mkdir(dir, dentry)) != 0) - goto err_dput; - RPC_I(dentry->d_inode)->private = rpc_client; - error = rpc_populate(dentry, authfiles, - RPCAUTH_info, RPCAUTH_EOF); - if (error) - goto err_depopulate; - dget(dentry); + goto out; + error = __rpc_mkdir(dir, dentry, mode, NULL, private); + if (error != 0) + goto out_err; + if (populate != NULL) { + error = populate(dentry, args_populate); + if (error) + goto err_rmdir; + } out: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); return dentry; -err_depopulate: - rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); +err_rmdir: __rpc_rmdir(dir, dentry); -err_dput: - dput(dentry); - printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n", - __FILE__, __func__, path, error); +out_err: dentry = ERR_PTR(error); goto out; } -/** - * rpc_rmdir - Remove a directory created with rpc_mkdir() - * @dentry: directory to remove - */ -int -rpc_rmdir(struct dentry *dentry) +static int rpc_rmdir_depopulate(struct dentry *dentry, + void (*depopulate)(struct dentry *)) { struct dentry *parent; struct inode *dir; @@ -748,9 +728,9 @@ rpc_rmdir(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); + if (depopulate != NULL) + depopulate(dentry); error = __rpc_rmdir(dir, dentry); - dput(dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; @@ -776,50 +756,54 @@ rpc_rmdir(struct dentry *dentry) * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry * -rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) +struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, + void *private, const struct rpc_pipe_ops *ops, + int flags) { struct dentry *dentry; - struct inode *dir, *inode; - struct rpc_inode *rpci; + struct inode *dir = parent->d_inode; + umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; + struct qstr q; + int err; + + if (ops->upcall == NULL) + umode &= ~S_IRUGO; + if (ops->downcall == NULL) + umode &= ~S_IWUGO; - dentry = rpc_lookup_create(parent, name, strlen(name), 0); + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len), + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = __rpc_lookup_create(parent, &q); if (IS_ERR(dentry)) - return dentry; - dir = parent->d_inode; + goto out; if (dentry->d_inode) { - rpci = RPC_I(dentry->d_inode); + struct rpc_inode *rpci = RPC_I(dentry->d_inode); if (rpci->private != private || rpci->ops != ops || rpci->flags != flags) { dput (dentry); - dentry = ERR_PTR(-EBUSY); + err = -EBUSY; + goto out_err; } rpci->nkern_readwriters++; goto out; } - inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); - if (!inode) - goto err_dput; - inode->i_ino = iunique(dir->i_sb, 100); - inode->i_fop = &rpc_pipe_fops; - d_instantiate(dentry, inode); - rpci = RPC_I(inode); - rpci->private = private; - rpci->flags = flags; - rpci->ops = ops; - rpci->nkern_readwriters = 1; - fsnotify_create(dir, dentry); - dget(dentry); + + err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, + private, ops, flags); + if (err) + goto out_err; out: mutex_unlock(&dir->i_mutex); return dentry; -err_dput: - dput(dentry); - dentry = ERR_PTR(-ENOMEM); +out_err: + dentry = ERR_PTR(err); printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", __FILE__, __func__, parent->d_name.name, name, - -ENOMEM); + err); goto out; } EXPORT_SYMBOL_GPL(rpc_mkpipe); @@ -842,23 +826,112 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); - if (!error) - d_delete(dentry); - } - dput(dentry); + error = __rpc_rmpipe(dir, dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; } EXPORT_SYMBOL_GPL(rpc_unlink); +enum { + RPCAUTH_info, + RPCAUTH_EOF +}; + +static const struct rpc_filelist authfiles[] = { + [RPCAUTH_info] = { + .name = "info", + .i_fop = &rpc_info_operations, + .mode = S_IFREG | S_IRUSR, + }, +}; + +static int rpc_clntdir_populate(struct dentry *dentry, void *private) +{ + return rpc_populate(dentry, + authfiles, RPCAUTH_info, RPCAUTH_EOF, + private); +} + +static void rpc_clntdir_depopulate(struct dentry *dentry) +{ + rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); +} + +/** + * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs + * @dentry: dentry from the rpc_pipefs root to the new directory + * @name: &struct qstr for the name + * @rpc_client: rpc client to associate with this directory + * + * This creates a directory at the given @path associated with + * @rpc_clnt, which will contain a file named "info" with some basic + * information about the client, together with any "pipes" that may + * later be created using rpc_mkpipe(). + */ +struct dentry *rpc_create_client_dir(struct dentry *dentry, + struct qstr *name, + struct rpc_clnt *rpc_client) +{ + return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, + rpc_clntdir_populate, rpc_client); +} + +/** + * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() + * @dentry: directory to remove + */ +int rpc_remove_client_dir(struct dentry *dentry) +{ + return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); +} + +static const struct rpc_filelist cache_pipefs_files[3] = { + [0] = { + .name = "channel", + .i_fop = &cache_file_operations_pipefs, + .mode = S_IFREG|S_IRUSR|S_IWUSR, + }, + [1] = { + .name = "content", + .i_fop = &content_file_operations_pipefs, + .mode = S_IFREG|S_IRUSR, + }, + [2] = { + .name = "flush", + .i_fop = &cache_flush_operations_pipefs, + .mode = S_IFREG|S_IRUSR|S_IWUSR, + }, +}; + +static int rpc_cachedir_populate(struct dentry *dentry, void *private) +{ + return rpc_populate(dentry, + cache_pipefs_files, 0, 3, + private); +} + +static void rpc_cachedir_depopulate(struct dentry *dentry) +{ + rpc_depopulate(dentry, cache_pipefs_files, 0, 3); +} + +struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, + mode_t umode, struct cache_detail *cd) +{ + return rpc_mkdir_populate(parent, name, umode, NULL, + rpc_cachedir_populate, cd); +} + +void rpc_remove_cache_dir(struct dentry *dentry) +{ + rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); +} + /* * populate the filesystem */ -static struct super_operations s_ops = { +static const struct super_operations s_ops = { .alloc_inode = rpc_alloc_inode, .destroy_inode = rpc_destroy_inode, .statfs = simple_statfs, @@ -866,6 +939,51 @@ static struct super_operations s_ops = { #define RPCAUTH_GSSMAGIC 0x67596969 +/* + * We have a single directory with 1 node in it. + */ +enum { + RPCAUTH_lockd, + RPCAUTH_mount, + RPCAUTH_nfs, + RPCAUTH_portmap, + RPCAUTH_statd, + RPCAUTH_nfsd4_cb, + RPCAUTH_cache, + RPCAUTH_RootEOF +}; + +static const struct rpc_filelist files[] = { + [RPCAUTH_lockd] = { + .name = "lockd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_mount] = { + .name = "mount", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfs] = { + .name = "nfs", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_portmap] = { + .name = "portmap", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_statd] = { + .name = "statd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfsd4_cb] = { + .name = "nfsd4_cb", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_cache] = { + .name = "cache", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, +}; + static int rpc_fill_super(struct super_block *sb, void *data, int silent) { @@ -886,7 +1004,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) iput(inode); return -ENOMEM; } - if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF)) + if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) goto out; sb->s_root = root; return 0; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index beee6da3303..830faf4d999 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -75,6 +75,37 @@ enum { #define RPCB_OWNER_STRING "0" #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) +/* + * XDR data type sizes + */ +#define RPCB_program_sz (1) +#define RPCB_version_sz (1) +#define RPCB_protocol_sz (1) +#define RPCB_port_sz (1) +#define RPCB_boolean_sz (1) + +#define RPCB_netid_sz (1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) +#define RPCB_addr_sz (1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) +#define RPCB_ownerstring_sz (1 + XDR_QUADLEN(RPCB_MAXOWNERLEN)) + +/* + * XDR argument and result sizes + */ +#define RPCB_mappingargs_sz (RPCB_program_sz + RPCB_version_sz + \ + RPCB_protocol_sz + RPCB_port_sz) +#define RPCB_getaddrargs_sz (RPCB_program_sz + RPCB_version_sz + \ + RPCB_netid_sz + RPCB_addr_sz + \ + RPCB_ownerstring_sz) + +#define RPCB_getportres_sz RPCB_port_sz +#define RPCB_setres_sz RPCB_boolean_sz + +/* + * Note that RFC 1833 does not put any size restrictions on the + * address string returned by the remote rpcbind database. + */ +#define RPCB_getaddrres_sz RPCB_addr_sz + static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; @@ -122,6 +153,7 @@ static void rpcb_map_release(void *data) rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); + kfree(map->r_addr); kfree(map); } @@ -268,12 +300,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); - char buf[32]; + int result; - /* Construct AF_INET universal address */ - snprintf(buf, sizeof(buf), "%pI4.%u.%u", - &sin->sin_addr.s_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -284,7 +313,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } /* @@ -296,16 +327,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); - char buf[64]; + int result; - /* Construct AF_INET6 universal address */ - if (ipv6_addr_any(&sin6->sin6_addr)) - snprintf(buf, sizeof(buf), "::.%u.%u", - port >> 8, port & 0xff); - else - snprintf(buf, sizeof(buf), "%pI6.%u.%u", - &sin6->sin6_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -316,7 +340,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) @@ -428,7 +454,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) struct rpc_message msg = { .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], .rpc_argp = &map, - .rpc_resp = &map.r_port, + .rpc_resp = &map, }; struct rpc_clnt *rpcb_clnt; int status; @@ -458,7 +484,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi struct rpc_message msg = { .rpc_proc = proc, .rpc_argp = map, - .rpc_resp = &map->r_port, + .rpc_resp = map, }; struct rpc_task_setup task_setup_data = { .rpc_client = rpcb_clnt, @@ -539,6 +565,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } + /* Parent transport's destination address */ salen = rpc_peeraddr(clnt, sap, sizeof(addr)); /* Don't ever use rpcbind v2 for AF_INET6 requests */ @@ -589,11 +616,22 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); - map->r_owner = ""; map->r_status = -EIO; + switch (bind_version) { + case RPCBVERS_4: + case RPCBVERS_3: + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_owner = ""; + break; + case RPCBVERS_2: + map->r_addr = NULL; + break; + default: + BUG(); + } + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { @@ -656,176 +694,278 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) * XDR functions for rpcbind */ -static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p, + const struct rpcbind_args *rpcb) { - dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + + dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + + p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz); + if (unlikely(p == NULL)) + return -EIO; + *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); *p++ = htonl(rpcb->r_prot); - *p++ = htonl(rpcb->r_port); + *p = htonl(rpcb->r_port); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } -static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, - unsigned short *portp) +static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) { - *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb getport result: %u\n", - *portp); + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + unsigned long port; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + rpcb->r_port = 0; + + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + return -EIO; + + port = ntohl(*p); + dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, port); + if (unlikely(port > USHORT_MAX)) + return -EIO; + + rpcb->r_port = port; return 0; } -static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, - unsigned int *boolp) +static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, + unsigned int *boolp) { - *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb set/unset call %s\n", + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + return -EIO; + + *boolp = 0; + if (*p) + *boolp = 1; + + dprintk("RPC: %5u RPCB_%s call %s\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, (*boolp ? "succeeded" : "failed")); return 0; } -static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int encode_rpcb_string(struct xdr_stream *xdr, const char *string, + const u32 maxstrlen) { - dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", - rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); - *p++ = htonl(rpcb->r_prog); - *p++ = htonl(rpcb->r_vers); + u32 len; + __be32 *p; - p = xdr_encode_string(p, rpcb->r_netid); - p = xdr_encode_string(p, rpcb->r_addr); - p = xdr_encode_string(p, rpcb->r_owner); + if (unlikely(string == NULL)) + return -EIO; + len = strlen(string); + if (unlikely(len > maxstrlen)) + return -EIO; - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + p = xdr_reserve_space(xdr, sizeof(__be32) + len); + if (unlikely(p == NULL)) + return -EIO; + xdr_encode_opaque(p, string, len); return 0; } -static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, - unsigned short *portp) +static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, + const struct rpcbind_args *rpcb) { - char *addr; - u32 addr_len; - int c, i, f, first, val; + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; - *portp = 0; - addr_len = ntohl(*p++); + dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, + rpcb->r_prog, rpcb->r_vers, + rpcb->r_netid, rpcb->r_addr); - if (addr_len == 0) { - dprintk("RPC: rpcb_decode_getaddr: " - "service is not registered\n"); - return 0; - } + xdr_init_encode(&xdr, &req->rq_snd_buf, p); - /* - * Simple sanity check. - */ - if (addr_len > RPCBIND_MAXUADDRLEN) - goto out_err; - - /* - * Start at the end and walk backwards until the first dot - * is encountered. When the second dot is found, we have - * both parts of the port number. - */ - addr = (char *)p; - val = 0; - first = 1; - f = 1; - for (i = addr_len - 1; i > 0; i--) { - c = addr[i]; - if (c >= '0' && c <= '9') { - val += (c - '0') * f; - f *= 10; - } else if (c == '.') { - if (first) { - *portp = val; - val = first = 0; - f = 1; - } else { - *portp |= (val << 8); - break; - } - } - } + p = xdr_reserve_space(&xdr, + sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz)); + if (unlikely(p == NULL)) + return -EIO; + *p++ = htonl(rpcb->r_prog); + *p = htonl(rpcb->r_vers); - /* - * Simple sanity check. If we never saw a dot in the reply, - * then this was probably just garbage. - */ - if (first) - goto out_err; + if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN)) + return -EIO; + if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN)) + return -EIO; + if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN)) + return -EIO; - dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; - -out_err: - dprintk("RPC: rpcbind server returned malformed reply\n"); - return -EIO; } -#define RPCB_program_sz (1u) -#define RPCB_version_sz (1u) -#define RPCB_protocol_sz (1u) -#define RPCB_port_sz (1u) -#define RPCB_boolean_sz (1u) +static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) +{ + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + u32 len; -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) -#define RPCB_addr_sz (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) -#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) + rpcb->r_port = 0; -#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ - RPCB_protocol_sz+RPCB_port_sz -#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \ - RPCB_netid_sz+RPCB_addr_sz+ \ - RPCB_ownerstring_sz + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); -#define RPCB_setres_sz RPCB_boolean_sz -#define RPCB_getportres_sz RPCB_port_sz - -/* - * Note that RFC 1833 does not put any size restrictions on the - * address string returned by the remote rpcbind database. - */ -#define RPCB_getaddrres_sz RPCB_addr_sz + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + goto out_fail; + len = ntohl(*p); -#define PROC(proc, argtype, restype) \ - [RPCBPROC_##proc] = { \ - .p_proc = RPCBPROC_##proc, \ - .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \ - .p_decode = (kxdrproc_t) rpcb_decode_##restype, \ - .p_arglen = RPCB_##argtype##args_sz, \ - .p_replen = RPCB_##restype##res_sz, \ - .p_statidx = RPCBPROC_##proc, \ - .p_timer = 0, \ - .p_name = #proc, \ + /* + * If the returned universal address is a null string, + * the requested RPC service was not registered. + */ + if (len == 0) { + dprintk("RPC: %5u RPCB reply: program not registered\n", + task->tk_pid); + return 0; } + if (unlikely(len > RPCBIND_MAXUADDRLEN)) + goto out_fail; + + p = xdr_inline_decode(&xdr, len); + if (unlikely(p == NULL)) + goto out_fail; + dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, (char *)p); + + if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0) + goto out_fail; + rpcb->r_port = rpc_get_port(sap); + + return 0; + +out_fail: + dprintk("RPC: %5u malformed RPCB_%s reply\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name); + return -EIO; +} + /* * Not all rpcbind procedures described in RFC 1833 are implemented * since the Linux kernel RPC code requires only these. */ + static struct rpc_procinfo rpcb_procedures2[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), - PROC(GETPORT, mapping, getport), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETPORT] = { + .p_proc = RPCBPROC_GETPORT, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_getport, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_getportres_sz, + .p_statidx = RPCBPROC_GETPORT, + .p_timer = 0, + .p_name = "GETPORT", + }, }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, getaddr, set), - PROC(UNSET, getaddr, set), - PROC(GETADDR, getaddr, getaddr), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETADDR] = { + .p_proc = RPCBPROC_GETADDR, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_getaddrres_sz, + .p_statidx = RPCBPROC_GETADDR, + .p_timer = 0, + .p_name = "GETADDR", + }, }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, getaddr, set), - PROC(UNSET, getaddr, set), - PROC(GETADDR, getaddr, getaddr), - PROC(GETVERSADDR, getaddr, getaddr), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETADDR] = { + .p_proc = RPCBPROC_GETADDR, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_getaddrres_sz, + .p_statidx = RPCBPROC_GETADDR, + .p_timer = 0, + .p_name = "GETADDR", + }, }; static struct rpcb_info rpcb_next_version[] = { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 8f459abe97c..cef74ba0666 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -21,6 +21,8 @@ #include <linux/sunrpc/clnt.h> +#include "sunrpc.h" + #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED #define RPC_TASK_MAGIC_ID 0xf00baa @@ -711,11 +713,6 @@ static void rpc_async_schedule(struct work_struct *work) __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); } -struct rpc_buffer { - size_t len; - char data[]; -}; - /** * rpc_malloc - allocate an RPC buffer * @task: RPC task that will use this buffer diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 5d9dd742264..90c292e2738 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -27,11 +27,25 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef _NET_SUNRPC_SUNRPC_H #define _NET_SUNRPC_SUNRPC_H +#include <linux/net.h> + +/* + * Header for dynamically allocated rpc buffers. + */ +struct rpc_buffer { + size_t len; + char data[]; +}; + static inline int rpc_reply_expected(struct rpc_task *task) { return (task->tk_msg.rpc_proc != NULL) && (task->tk_msg.rpc_proc->p_decode != NULL); } +int svc_send_common(struct socket *sock, struct xdr_buf *xdr, + struct page *headpage, unsigned long headoffset, + struct page *tailpage, unsigned long tailoffset); + #endif /* _NET_SUNRPC_SUNRPC_H */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index adaa81982f7..8cce9218901 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -69,5 +69,5 @@ cleanup_sunrpc(void) rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_LICENSE("GPL"); -module_init(init_sunrpc); +fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */ module_exit(cleanup_sunrpc); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 952f206ff30..538ca433a56 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1103,8 +1103,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; } - if (*statp == rpc_success && (xdr = procp->pc_encode) - && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { + if (*statp == rpc_success && + (xdr = procp->pc_encode) && + !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { dprintk("svc: failed to encode reply\n"); /* serv->sv_stats->rpcsystemerr++; */ *statp = rpc_system_err; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 27d44332f01..b845e2293df 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -129,8 +129,8 @@ static void svc_xprt_free(struct kref *kref) struct svc_xprt *xprt = container_of(kref, struct svc_xprt, xpt_ref); struct module *owner = xprt->xpt_class->xcl_owner; - if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) - && xprt->xpt_auth_cache != NULL) + if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && + xprt->xpt_auth_cache != NULL) svcauth_unix_info_release(xprt->xpt_auth_cache); xprt->xpt_ops->xpo_free(xprt); module_put(owner); @@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); + rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); } EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -710,10 +711,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&pool->sp_lock); len = 0; - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { - dprintk("svc_recv: found XPT_CLOSE\n"); - svc_delete_xprt(xprt); - } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { @@ -739,7 +737,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_received(newxpt); } svc_xprt_received(xprt); - } else { + } else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); @@ -752,6 +750,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) dprintk("svc: got len=%d\n", len); } + if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { + dprintk("svc_recv: found XPT_CLOSE\n"); + svc_delete_xprt(xprt); + } + /* No data, incomplete (TCP) read, or accept() */ if (len == 0 || len == -EAGAIN) { rqstp->rq_res.len = 0; @@ -808,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp) else len = xprt->xpt_ops->xpo_sendto(rqstp); mutex_unlock(&xprt->xpt_mutex); + rpc_wake_up(&xprt->xpt_bc_pending); svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) @@ -842,8 +846,8 @@ static void svc_age_temp_xprts(unsigned long closure) * through, close it. */ if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) continue; - if (atomic_read(&xprt->xpt_ref.refcount) > 1 - || test_bit(XPT_BUSY, &xprt->xpt_flags)) + if (atomic_read(&xprt->xpt_ref.refcount) > 1 || + test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; svc_xprt_get(xprt); list_move(le, &to_be_aged); @@ -1166,11 +1170,6 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); - lock_kernel(); - /* bump up the pseudo refcount while traversing */ - svc_get(serv); - unlock_kernel(); - if (!pidx) return SEQ_START_TOKEN; return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); @@ -1198,12 +1197,6 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) static void svc_pool_stats_stop(struct seq_file *m, void *p) { - struct svc_serv *serv = m->private; - - lock_kernel(); - /* this function really, really should have been called svc_put() */ - svc_destroy(serv); - unlock_kernel(); } static int svc_pool_stats_show(struct seq_file *m, void *p) diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index e64109b02ae..4e9393c2468 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -46,8 +46,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) dprintk("svc: svc_authenticate (%d)\n", flavor); spin_lock(&authtab_lock); - if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) - || !try_module_get(aops->owner)) { + if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) || + !try_module_get(aops->owner)) { spin_unlock(&authtab_lock); *authp = rpc_autherr_badcred; return SVC_DENIED; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 5c865e2d299..4a8f6558718 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -125,8 +125,8 @@ static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { struct ip_map *orig = container_of(corig, struct ip_map, h); struct ip_map *new = container_of(cnew, struct ip_map, h); - return strcmp(orig->m_class, new->m_class) == 0 - && ipv6_addr_equal(&orig->m_addr, &new->m_addr); + return strcmp(orig->m_class, new->m_class) == 0 && + ipv6_addr_equal(&orig->m_addr, &new->m_addr); } static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) { @@ -171,6 +171,11 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); +} + static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -289,7 +294,7 @@ struct cache_detail ip_map_cache = { .hash_table = ip_table, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_request = ip_map_request, + .cache_upcall = ip_map_upcall, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, @@ -523,6 +528,11 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); +} + static struct unix_gid *unix_gid_lookup(uid_t uid); extern struct cache_detail unix_gid_cache; @@ -622,7 +632,7 @@ struct cache_detail unix_gid_cache = { .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_request = unix_gid_request, + .cache_upcall = unix_gid_upcall, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, @@ -658,6 +668,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip, case 0: *gip = ug->gi; get_group_info(*gip); + cache_put(&ug->h, &unix_gid_cache); return 0; default: return -EAGAIN; @@ -675,8 +686,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) case AF_INET: sin = svc_addr_in(rqstp); sin6 = &sin6_storage; - ipv6_addr_set(&sin6->sin6_addr, 0, 0, - htonl(0x0000FFFF), sin->sin_addr.s_addr); + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr); break; case AF_INET6: sin6 = svc_addr_in6(rqstp); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 23128ee191a..870929e08e5 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -49,6 +49,7 @@ #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/stats.h> +#include <linux/sunrpc/xprt.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -110,7 +111,7 @@ static void svc_release_skb(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); } } @@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) } /* - * Generic sendto routine + * send routine intended to be shared by the fore- and back-channel */ -static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) +int svc_send_common(struct socket *sock, struct xdr_buf *xdr, + struct page *headpage, unsigned long headoffset, + struct page *tailpage, unsigned long tailoffset) { - struct svc_sock *svsk = - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - struct socket *sock = svsk->sk_sock; - int slen; - union { - struct cmsghdr hdr; - long all[SVC_PKTINFO_SPACE / sizeof(long)]; - } buffer; - struct cmsghdr *cmh = &buffer.hdr; - int len = 0; int result; int size; struct page **ppage = xdr->pages; size_t base = xdr->page_base; unsigned int pglen = xdr->page_len; unsigned int flags = MSG_MORE; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); + int slen; + int len = 0; slen = xdr->len; - if (rqstp->rq_prot == IPPROTO_UDP) { - struct msghdr msg = { - .msg_name = &rqstp->rq_addr, - .msg_namelen = rqstp->rq_addrlen, - .msg_control = cmh, - .msg_controllen = sizeof(buffer), - .msg_flags = MSG_MORE, - }; - - svc_set_cmsg_data(rqstp, cmh); - - if (sock_sendmsg(sock, &msg, 0) < 0) - goto out; - } - /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, + len = kernel_sendpage(sock, headpage, headoffset, xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; @@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) base = 0; ppage++; } + /* send tail */ if (xdr->tail[0].iov_len) { - result = kernel_sendpage(sock, rqstp->rq_respages[0], - ((unsigned long)xdr->tail[0].iov_base) - & (PAGE_SIZE-1), - xdr->tail[0].iov_len, 0); - + result = kernel_sendpage(sock, tailpage, tailoffset, + xdr->tail[0].iov_len, 0); if (result > 0) len += result; } + +out: + return len; +} + + +/* + * Generic sendto routine + */ +static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) +{ + struct svc_sock *svsk = + container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct socket *sock = svsk->sk_sock; + union { + struct cmsghdr hdr; + long all[SVC_PKTINFO_SPACE / sizeof(long)]; + } buffer; + struct cmsghdr *cmh = &buffer.hdr; + int len = 0; + unsigned long tailoff; + unsigned long headoff; + RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); + + if (rqstp->rq_prot == IPPROTO_UDP) { + struct msghdr msg = { + .msg_name = &rqstp->rq_addr, + .msg_namelen = rqstp->rq_addrlen, + .msg_control = cmh, + .msg_controllen = sizeof(buffer), + .msg_flags = MSG_MORE, + }; + + svc_set_cmsg_data(rqstp, cmh); + + if (sock_sendmsg(sock, &msg, 0) < 0) + goto out; + } + + tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1); + headoff = 0; + len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff, + rqstp->rq_respages[0], tailoff); + out: dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, @@ -251,14 +272,14 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) case PF_INET: len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", proto_name, - &inet_sk(sk)->rcv_saddr, - inet_sk(sk)->num); + &inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); break; case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", proto_name, &inet6_sk(sk)->rcv_saddr, - inet_sk(sk)->num); + inet_sk(sk)->inet_num); break; default: len = snprintf(buf, remaining, "*unknown-%d*\n", @@ -432,29 +453,49 @@ static void svc_tcp_write_space(struct sock *sk) } /* + * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo + */ +static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, + struct cmsghdr *cmh) +{ + struct in_pktinfo *pki = CMSG_DATA(cmh); + if (cmh->cmsg_type != IP_PKTINFO) + return 0; + rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; + return 1; +} + +/* + * See net/ipv6/datagram.c : datagram_recv_ctl + */ +static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, + struct cmsghdr *cmh) +{ + struct in6_pktinfo *pki = CMSG_DATA(cmh); + if (cmh->cmsg_type != IPV6_PKTINFO) + return 0; + ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); + return 1; +} + +/* * Copy the UDP datagram's destination address to the rqstp structure. * The 'destination' address in this case is the address to which the * peer sent the datagram, i.e. our local address. For multihomed * hosts, this can change from msg to msg. Note that only the IP * address changes, the port number should remain the same. */ -static void svc_udp_get_dest_address(struct svc_rqst *rqstp, - struct cmsghdr *cmh) +static int svc_udp_get_dest_address(struct svc_rqst *rqstp, + struct cmsghdr *cmh) { - struct svc_sock *svsk = - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - switch (svsk->sk_sk->sk_family) { - case AF_INET: { - struct in_pktinfo *pki = CMSG_DATA(cmh); - rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; - break; - } - case AF_INET6: { - struct in6_pktinfo *pki = CMSG_DATA(cmh); - ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); - break; - } + switch (cmh->cmsg_level) { + case SOL_IP: + return svc_udp_get_dest_address4(rqstp, cmh); + case SOL_IPV6: + return svc_udp_get_dest_address6(rqstp, cmh); } + + return 0; } /* @@ -531,16 +572,15 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_prot = IPPROTO_UDP; - if (cmh->cmsg_level != IPPROTO_IP || - cmh->cmsg_type != IP_PKTINFO) { + if (!svc_udp_get_dest_address(rqstp, cmh)) { if (net_ratelimit()) - printk("rpcsvc: received unknown control message:" - "%d/%d\n", - cmh->cmsg_level, cmh->cmsg_type); - skb_free_datagram(svsk->sk_sk, skb); + printk(KERN_WARNING + "svc: received unknown control message %d/%d; " + "dropping RPC reply datagram\n", + cmh->cmsg_level, cmh->cmsg_type); + skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } - svc_udp_get_dest_address(rqstp, cmh); if (skb_is_nonlinear(skb)) { /* we have to copy */ @@ -548,18 +588,18 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { local_bh_enable(); /* checksum error */ - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } local_bh_enable(); - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); } else { /* we can use it in-place */ rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); rqstp->rq_arg.head[0].iov_len = len; if (skb_checksum_complete(skb)) { - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } rqstp->rq_xprt_ctxt = skb; @@ -651,8 +691,7 @@ static struct svc_xprt_class svc_udp_class = { static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { - int one = 1; - mm_segment_t oldfs; + int err, level, optname, one = 1; svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); @@ -671,12 +710,22 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); - oldfs = get_fs(); - set_fs(KERNEL_DS); /* make sure we get destination address info */ - svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO, - (char __user *)&one, sizeof(one)); - set_fs(oldfs); + switch (svsk->sk_sk->sk_family) { + case AF_INET: + level = SOL_IP; + optname = IP_PKTINFO; + break; + case AF_INET6: + level = SOL_IPV6; + optname = IPV6_RECVPKTINFO; + break; + default: + BUG(); + } + err = kernel_setsockopt(svsk->sk_sock, level, optname, + (char *)&one, sizeof(one)); + dprintk("svc: kernel_setsockopt returned %d\n", err); } /* @@ -826,21 +875,15 @@ failed: } /* - * Receive data from a TCP socket. + * Receive data. + * If we haven't gotten the record length yet, get the next four bytes. + * Otherwise try to gobble up as much as possible up to the complete + * record length. */ -static int svc_tcp_recvfrom(struct svc_rqst *rqstp) +static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { - struct svc_sock *svsk = - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int len; - struct kvec *vec; - int pnum, vlen; - - dprintk("svc: tcp_recv %p data %d conn %d close %d\n", - svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); + int len; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) /* sndbuf needs to have room for one request @@ -861,10 +904,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - /* Receive data. If we haven't got the record length yet, get - * the next four bytes. Otherwise try to gobble up as much as - * possible up to the complete record length. - */ if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; @@ -879,7 +918,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); svc_xprt_received(&svsk->sk_xprt); - return -EAGAIN; /* record header not complete */ + goto err_again; /* record header not complete */ } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -894,6 +933,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) "per record not supported\n"); goto err_delete; } + svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); if (svsk->sk_reclen > serv->sv_max_mesg) { @@ -914,17 +954,121 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) dprintk("svc: incomplete TCP record (%d of %d)\n", len, svsk->sk_reclen); svc_xprt_received(&svsk->sk_xprt); - return -EAGAIN; /* record not complete */ + goto err_again; /* record not complete */ } len = svsk->sk_reclen; set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + return len; + error: + if (len == -EAGAIN) { + dprintk("RPC: TCP recv_record got EAGAIN\n"); + svc_xprt_received(&svsk->sk_xprt); + } + return len; + err_delete: + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + err_again: + return -EAGAIN; +} + +static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, + struct rpc_rqst **reqpp, struct kvec *vec) +{ + struct rpc_rqst *req = NULL; + u32 *p; + u32 xid; + u32 calldir; + int len; + + len = svc_recvfrom(rqstp, vec, 1, 8); + if (len < 0) + goto error; + + p = (u32 *)rqstp->rq_arg.head[0].iov_base; + xid = *p++; + calldir = *p; + + if (calldir == 0) { + /* REQUEST is the most common case */ + vec[0] = rqstp->rq_arg.head[0]; + } else { + /* REPLY */ + if (svsk->sk_bc_xprt) + req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); + + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x sk_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + svsk->sk_bc_xprt, xid); + vec[0] = rqstp->rq_arg.head[0]; + goto out; + } + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + sizeof(struct xdr_buf)); + /* copy the xid and call direction */ + memcpy(req->rq_private_buf.head[0].iov_base, + rqstp->rq_arg.head[0].iov_base, 8); + vec[0] = req->rq_private_buf.head[0]; + } + out: + vec[0].iov_base += 8; + vec[0].iov_len -= 8; + len = svsk->sk_reclen - 8; + error: + *reqpp = req; + return len; +} + +/* + * Receive data from a TCP socket. + */ +static int svc_tcp_recvfrom(struct svc_rqst *rqstp) +{ + struct svc_sock *svsk = + container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct svc_serv *serv = svsk->sk_xprt.xpt_server; + int len; + struct kvec *vec; + int pnum, vlen; + struct rpc_rqst *req = NULL; + + dprintk("svc: tcp_recv %p data %d conn %d close %d\n", + svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), + test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), + test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); + + len = svc_tcp_recv_record(svsk, rqstp); + if (len < 0) + goto error; + vec = rqstp->rq_vec; vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; + + /* + * We have enough data for the whole tcp record. Let's try and read the + * first 8 bytes to get the xid and the call direction. We can use this + * to figure out if this is a call or a reply to a callback. If + * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. + * In that case, don't bother with the calldir and just read the data. + * It will be rejected in svc_process. + */ + if (len >= 8) { + len = svc_process_calldir(svsk, rqstp, &req, vec); + if (len < 0) + goto err_again; + vlen -= 8; + } + pnum = 1; while (vlen < len) { - vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); + vec[pnum].iov_base = (req) ? + page_address(req->rq_private_buf.pages[pnum - 1]) : + page_address(rqstp->rq_pages[pnum]); vec[pnum].iov_len = PAGE_SIZE; pnum++; vlen += PAGE_SIZE; @@ -934,8 +1078,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) /* Now receive data */ len = svc_recvfrom(rqstp, vec, pnum, len); if (len < 0) - goto error; + goto err_again; + /* + * Account for the 8 bytes we read earlier + */ + len += 8; + + if (req) { + xprt_complete_rqst(req->rq_task, len); + len = 0; + goto out; + } dprintk("svc: TCP complete record (%d bytes)\n", len); rqstp->rq_arg.len = len; rqstp->rq_arg.page_base = 0; @@ -949,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; +out: /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; @@ -960,21 +1115,19 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) return len; - err_delete: - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - return -EAGAIN; - - error: +err_again: if (len == -EAGAIN) { dprintk("RPC: TCP recvfrom got EAGAIN\n"); svc_xprt_received(&svsk->sk_xprt); - } else { + return len; + } +error: + if (len != -EAGAIN) { printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_xprt.xpt_server->sv_name, -len); - goto err_delete; + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); } - - return len; + return -EAGAIN; } /* @@ -1158,7 +1311,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, - ntohs(inet_sk(inet)->sport)); + ntohs(inet_sk(inet)->inet_sport)); if (*errp < 0) { kfree(svsk); diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 5231f7aaac0..e65dcc61333 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -56,7 +56,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, struct file *file, +static int proc_do_xprt(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file, } static int -proc_dodebug(ctl_table *table, int write, struct file *file, +proc_dodebug(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; @@ -139,46 +139,45 @@ static ctl_table debug_table[] = { .data = &rpc_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "nfs_debug", .data = &nfs_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "nfsd_debug", .data = &nfsd_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "nlm_debug", .data = &nlm_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "transports", .maxlen = 256, .mode = 0444, - .proc_handler = &proc_do_xprt, + .proc_handler = proc_do_xprt, }, - { .ctl_name = 0 } + { } }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = debug_table }, - { .ctl_name = 0 } + { } }; #endif diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index 31becbf0926..dd824341c34 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -25,8 +25,13 @@ #define RPC_RTO_INIT (HZ/5) #define RPC_RTO_MIN (HZ/10) -void -rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) +/** + * rpc_init_rtt - Initialize an RPC RTT estimator context + * @rt: context to initialize + * @timeo: initial timeout value, in jiffies + * + */ +void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) { unsigned long init = 0; unsigned i; @@ -43,12 +48,16 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) } EXPORT_SYMBOL_GPL(rpc_init_rtt); -/* +/** + * rpc_update_rtt - Update an RPC RTT estimator context + * @rt: context to update + * @timer: timer array index (request type) + * @m: recent actual RTT, in jiffies + * * NB: When computing the smoothed RTT and standard deviation, * be careful not to produce negative intermediate results. */ -void -rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) +void rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) { long *srtt, *sdrtt; @@ -79,21 +88,25 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) } EXPORT_SYMBOL_GPL(rpc_update_rtt); -/* - * Estimate rto for an nfs rpc sent via. an unreliable datagram. - * Use the mean and mean deviation of rtt for the appropriate type of rpc - * for the frequent rpcs and a default for the others. - * The justification for doing "other" this way is that these rpcs - * happen so infrequently that timer est. would probably be stale. - * Also, since many of these rpcs are - * non-idempotent, a conservative timeout is desired. +/** + * rpc_calc_rto - Provide an estimated timeout value + * @rt: context to use for calculation + * @timer: timer array index (request type) + * + * Estimate RTO for an NFS RPC sent via an unreliable datagram. Use + * the mean and mean deviation of RTT for the appropriate type of RPC + * for frequently issued RPCs, and a fixed default for the others. + * + * The justification for doing "other" this way is that these RPCs + * happen so infrequently that timer estimation would probably be + * stale. Also, since many of these RPCs are non-idempotent, a + * conservative timeout is desired. + * * getattr, lookup, * read, write, commit - A+4D * other - timeo */ - -unsigned long -rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) +unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) { unsigned long res; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 406e26de584..8bd690c48b6 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -24,7 +24,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) unsigned int quadlen = XDR_QUADLEN(obj->len); p[quadlen] = 0; /* zero trailing bytes */ - *p++ = htonl(obj->len); + *p++ = cpu_to_be32(obj->len); memcpy(p, obj->data, obj->len); return p + XDR_QUADLEN(obj->len); } @@ -35,7 +35,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) { unsigned int len; - if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ) + if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ) return NULL; obj->len = len; obj->data = (u8 *) p; @@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed); */ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) { - *p++ = htonl(nbytes); + *p++ = cpu_to_be32(nbytes); return xdr_encode_opaque_fixed(p, ptr, nbytes); } EXPORT_SYMBOL_GPL(xdr_encode_opaque); @@ -101,7 +101,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp, { u32 len; - len = ntohl(*p++); + len = be32_to_cpu(*p++); if (len > maxlen) return NULL; *lenp = len; @@ -771,7 +771,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); if (status) return status; - *obj = ntohl(raw); + *obj = be32_to_cpu(raw); return 0; } EXPORT_SYMBOL_GPL(xdr_decode_word); @@ -779,7 +779,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_word); int xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) { - __be32 raw = htonl(obj); + __be32 raw = cpu_to_be32(obj); return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f412a852bc7..fd46d42afa8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static inline int xprt_has_timer(struct rpc_xprt *xprt) +{ + return xprt->idle_timeout != 0; +} + /** * xprt_prepare_transmit - reserve the transport before sending a request * @task: RPC task about to send a request @@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task) if (!list_empty(&req->rq_list)) list_del(&req->rq_list); xprt->last_used = jiffies; - if (list_empty(&xprt->recv)) + if (list_empty(&xprt->recv) && xprt_has_timer(xprt)) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); @@ -1082,8 +1087,11 @@ found: #endif /* CONFIG_NFS_V4_1 */ INIT_WORK(&xprt->task_cleanup, xprt_autoclose); - setup_timer(&xprt->timer, xprt_init_autodisconnect, - (unsigned long)xprt); + if (xprt_has_timer(xprt)) + setup_timer(&xprt->timer, xprt_init_autodisconnect, + (unsigned long)xprt); + else + init_timer(&xprt->timer); xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; xprt->bind_index = 0; @@ -1102,7 +1110,6 @@ found: dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); - return xprt; } diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 87101177825..5b8a8ff93a2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep; * current value. */ static int read_reset_stat(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { atomic_t *stat = (atomic_t *)table->data; @@ -120,8 +120,7 @@ static ctl_table svcrdma_parm_table[] = { .data = &svcrdma_max_requests, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_max_requests, .extra2 = &max_max_requests }, @@ -130,8 +129,7 @@ static ctl_table svcrdma_parm_table[] = { .data = &svcrdma_max_req_size, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_max_inline, .extra2 = &max_max_inline }, @@ -140,8 +138,7 @@ static ctl_table svcrdma_parm_table[] = { .data = &svcrdma_ord, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_ord, .extra2 = &max_ord, }, @@ -151,67 +148,65 @@ static ctl_table svcrdma_parm_table[] = { .data = &rdma_stat_read, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_recv", .data = &rdma_stat_recv, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_write", .data = &rdma_stat_write, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_sq_starve", .data = &rdma_stat_sq_starve, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_rq_starve", .data = &rdma_stat_rq_starve, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_rq_poll", .data = &rdma_stat_rq_poll, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_rq_prod", .data = &rdma_stat_rq_prod, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_sq_poll", .data = &rdma_stat_sq_poll, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_sq_prod", .data = &rdma_stat_sq_prod, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, - }, - { - .ctl_name = 0, + .proc_handler = read_reset_stat, }, + { }, }; static ctl_table svcrdma_table[] = { @@ -220,21 +215,16 @@ static ctl_table svcrdma_table[] = { .mode = 0555, .child = svcrdma_parm_table }, - { - .ctl_name = 0, - }, + { }, }; static ctl_table svcrdma_root_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = svcrdma_table }, - { - .ctl_name = 0, - }, + { }, }; void svc_rdma_cleanup(void) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 9e884383134..f92e37eb413 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -337,10 +337,9 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { - if ((RDMA_TRANSPORT_IWARP == - rdma_node_get_transport(xprt->sc_cm_id-> - device->node_type)) - && sge_count > 1) + if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == + RDMA_TRANSPORT_IWARP) && + sge_count > 1) return 1; else return min_t(int, sge_count, xprt->sc_max_sge); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 5151f9f6c57..3fa5751af0e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -42,6 +42,7 @@ #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> +#include <linux/sched.h> #include <linux/spinlock.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -730,12 +731,12 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) goto err; mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); - if (!mr) + if (IS_ERR(mr)) goto err_free_frmr; pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, RPCSVC_MAXPAGES); - if (!pl) + if (IS_ERR(pl)) goto err_free_mr; frmr->mr = mr; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1dd6123070e..7018eef1dcd 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -86,79 +86,63 @@ static struct ctl_table_header *sunrpc_table_header; static ctl_table xr_tunables_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_read", .data = &xprt_rdma_max_inline_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_write", .data = &xprt_rdma_max_inline_write, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_inline_write_padding", .data = &xprt_rdma_inline_write_padding, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &max_padding, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_memreg_strategy", .data = &xprt_rdma_memreg_strategy, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_memreg, .extra2 = &max_memreg, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_pad_optimize", .data = &xprt_rdma_pad_optimize, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = 0, + .proc_handler = proc_dointvec, }, + { }, }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xr_tunables_table }, - { - .ctl_name = 0, - }, + { }, }; #endif @@ -168,47 +152,25 @@ static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ static void xprt_rdma_format_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) + struct sockaddr *sap = (struct sockaddr *) &rpcx_to_rdmad(xprt).addr; - char *buf; + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + char buf[64]; - buf = kzalloc(20, GFP_KERNEL); - if (buf) - snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%u", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_PORT] = buf; + (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - buf = kzalloc(48, GFP_KERNEL); - if (buf) - snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), "rdma"); - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", + NIPQUAD(sin->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); + + (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); /* netid */ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 465aafc2007..2209aa87d89 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -878,8 +878,8 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { * others indicate a transport condition which has already * undergone a best-effort. */ - if (ep->rep_connected == -ECONNREFUSED - && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { + if (ep->rep_connected == -ECONNREFUSED && + ++retry_count <= RDMA_CONNECT_RETRY_MAX) { dprintk("RPC: %s: non-peer_reject, retry\n", __func__); goto retry; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 83c73c4d017..04732d09013 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -32,6 +32,7 @@ #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> +#include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprtsock.h> #include <linux/file.h> #ifdef CONFIG_NFS_V4_1 @@ -43,6 +44,7 @@ #include <net/udp.h> #include <net/tcp.h> +#include "sunrpc.h" /* * xprtsock tunables */ @@ -79,46 +81,38 @@ static struct ctl_table_header *sunrpc_table_header; */ static ctl_table xs_tunables_table[] = { { - .ctl_name = CTL_SLOTTABLE_UDP, .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_SLOTTABLE_TCP, .procname = "tcp_slot_table_entries", .data = &xprt_tcp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_MIN_RESVPORT, .procname = "min_resvport", .data = &xprt_min_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { - .ctl_name = CTL_MAX_RESVPORT, .procname = "max_resvport", .data = &xprt_max_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, @@ -127,24 +121,18 @@ static ctl_table xs_tunables_table[] = { .data = &xs_tcp_fin_timeout, .maxlen = sizeof(xs_tcp_fin_timeout), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = sysctl_jiffies - }, - { - .ctl_name = 0, + .proc_handler = proc_dointvec_jiffies, }, + { }, }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xs_tunables_table }, - { - .ctl_name = 0, - }, + { }, }; #endif @@ -248,8 +236,8 @@ struct sock_xprt { * Connection of transports */ struct delayed_work connect_worker; - struct sockaddr_storage addr; - unsigned short port; + struct sockaddr_storage srcaddr; + unsigned short srcport; /* * UDP socket buffer size parameters @@ -296,117 +284,60 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) return (struct sockaddr_in6 *) &xprt->addr; } -static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, - const char *protocol, - const char *netid) +static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = xs_addr_in(xprt); - char *buf; + struct sockaddr *sap = xs_addr(xprt); + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + char buf[128]; - buf = kzalloc(20, GFP_KERNEL); - if (buf) { - snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); - } - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%u", - ntohs(addr->sin_port)); - } - xprt->address_strings[RPC_DISPLAY_PORT] = buf; - - xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; - - buf = kzalloc(48, GFP_KERNEL); - if (buf) { - snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), - protocol); - } - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) { - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - } - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) { - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); + switch (sap->sa_family) { + case AF_INET: + sin = xs_addr_in(xprt); + (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", + NIPQUAD(sin->sin_addr.s_addr)); + break; + case AF_INET6: + sin6 = xs_addr_in6(xprt); + (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); + break; + default: + BUG(); } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - - xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } -static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, - const char *protocol, - const char *netid) +static void xs_format_common_peer_ports(struct rpc_xprt *xprt) { - struct sockaddr_in6 *addr = xs_addr_in6(xprt); - char *buf; + struct sockaddr *sap = xs_addr(xprt); + char buf[128]; - buf = kzalloc(40, GFP_KERNEL); - if (buf) { - snprintf(buf, 40, "%pI6",&addr->sin6_addr); - } - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%u", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_PORT] = buf; + (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); +} +static void xs_format_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) +{ xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; + xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xs_format_common_peer_addresses(xprt); + xs_format_common_peer_ports(xprt); +} - buf = kzalloc(64, GFP_KERNEL); - if (buf) { - snprintf(buf, 64, "addr=%pI6 port=%u proto=%s", - &addr->sin6_addr, - ntohs(addr->sin6_port), - protocol); - } - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(36, GFP_KERNEL); - if (buf) - snprintf(buf, 36, "%pi6", &addr->sin6_addr); - - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(50, GFP_KERNEL); - if (buf) { - snprintf(buf, 50, "%pI6.%u.%u", - &addr->sin6_addr, - ntohs(addr->sin6_port) >> 8, - ntohs(addr->sin6_port) & 0xff); - } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; +static void xs_update_peer_port(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xs_format_common_peer_ports(xprt); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) @@ -828,6 +759,7 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); xs_reset_transport(transport); + xprt->reestablish_timeout = 0; smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); @@ -1319,6 +1251,12 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) if (xprt->shutdown) goto out; + /* Any data means we had a useful conversation, so + * the we don't need to delay the next reconnect + */ + if (xprt->reestablish_timeout) + xprt->reestablish_timeout = 0; + /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ rd_desc.arg.data = xprt; do { @@ -1587,25 +1525,15 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr *addr = xs_addr(xprt); - dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - switch (addr->sa_family) { - case AF_INET: - ((struct sockaddr_in *)addr)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); - break; - default: - BUG(); - } + rpc_set_port(xs_addr(xprt), port); + xs_update_peer_port(xprt); } static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) { - unsigned short port = transport->port; + unsigned short port = transport->srcport; if (port == 0 && transport->xprt.resvport) port = xs_get_random_port(); @@ -1614,8 +1542,8 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) { - if (transport->port != 0) - transport->port = 0; + if (transport->srcport != 0) + transport->srcport = 0; if (!transport->xprt.resvport) return 0; if (port <= xprt_min_resvport || port > xprt_max_resvport) @@ -1633,7 +1561,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in *)&transport->addr; + sa = (struct sockaddr_in *)&transport->srcaddr; myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); @@ -1642,7 +1570,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->port = port; + transport->srcport = port; break; } last = port; @@ -1666,7 +1594,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in6 *)&transport->addr; + sa = (struct sockaddr_in6 *)&transport->srcaddr; myaddr.sin6_addr = sa->sin6_addr; do { myaddr.sin6_port = htons(port); @@ -1675,7 +1603,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->port = port; + transport->srcport = port; break; } last = port; @@ -1780,8 +1708,11 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1822,8 +1753,11 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1948,8 +1882,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, goto out_eagain; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", @@ -2090,6 +2027,8 @@ static void xs_connect(struct rpc_task *task) &transport->connect_worker, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { @@ -2120,7 +2059,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", - transport->port, + transport->srcport, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, @@ -2144,7 +2083,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", - transport->port, + transport->srcport, xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -2156,6 +2095,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) xprt->stat.bklog_u); } +/* + * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason + * we allocate pages instead doing a kmalloc like rpc_malloc is because we want + * to use the server side send routines. + */ +void *bc_malloc(struct rpc_task *task, size_t size) +{ + struct page *page; + struct rpc_buffer *buf; + + BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer)); + page = alloc_page(GFP_KERNEL); + + if (!page) + return NULL; + + buf = page_address(page); + buf->len = PAGE_SIZE; + + return buf->data; +} + +/* + * Free the space allocated in the bc_alloc routine + */ +void bc_free(void *buffer) +{ + struct rpc_buffer *buf; + + if (!buffer) + return; + + buf = container_of(buffer, struct rpc_buffer, data); + free_page((unsigned long)buf); +} + +/* + * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex + * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request. + */ +static int bc_sendto(struct rpc_rqst *req) +{ + int len; + struct xdr_buf *xbufp = &req->rq_snd_buf; + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + unsigned long headoff; + unsigned long tailoff; + + /* + * Set up the rpc header and record marker stuff + */ + xs_encode_tcp_record_marker(xbufp); + + tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; + headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; + len = svc_send_common(sock, xbufp, + virt_to_page(xbufp->head[0].iov_base), headoff, + xbufp->tail[0].iov_base, tailoff); + + if (len != xbufp->len) { + printk(KERN_NOTICE "Error sending entire callback!\n"); + len = -EAGAIN; + } + + return len; +} + +/* + * The send routine. Borrows from svc_send + */ +static int bc_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct svc_xprt *xprt; + struct svc_sock *svsk; + u32 len; + + dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); + /* + * Get the server socket associated with this callback xprt + */ + xprt = req->rq_xprt->bc_xprt; + svsk = container_of(xprt, struct svc_sock, sk_xprt); + + /* + * Grab the mutex to serialize data as the connection is shared + * with the fore channel + */ + if (!mutex_trylock(&xprt->xpt_mutex)) { + rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL); + if (!mutex_trylock(&xprt->xpt_mutex)) + return -EAGAIN; + rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task); + } + if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + len = -ENOTCONN; + else + len = bc_sendto(req); + mutex_unlock(&xprt->xpt_mutex); + + if (len > 0) + len = 0; + + return len; +} + +/* + * The close routine. Since this is client initiated, we do nothing + */ + +static void bc_close(struct rpc_xprt *xprt) +{ + return; +} + +/* + * The xprt destroy routine. Again, because this connection is client + * initiated, we do nothing + */ + +static void bc_destroy(struct rpc_xprt *xprt) +{ + return; +} + static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, @@ -2192,6 +2259,22 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; +/* + * The rpc_xprt_ops for the server backchannel + */ + +static struct rpc_xprt_ops bc_tcp_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xprt_release_xprt, + .buf_alloc = bc_malloc, + .buf_free = bc_free, + .send_request = bc_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = bc_close, + .destroy = bc_destroy, + .print_stats = xs_tcp_print_stats, +}; + static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, unsigned int slot_table_size) { @@ -2223,7 +2306,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; if (args->srcaddr) - memcpy(&new->addr, args->srcaddr, args->addrlen); + memcpy(&new->srcaddr, args->srcaddr, args->addrlen); return xprt; } @@ -2272,7 +2355,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); + xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) @@ -2280,15 +2363,22 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); + xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); if (try_module_get(THIS_MODULE)) return xprt; @@ -2337,23 +2427,33 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) if (((struct sockaddr_in *)addr)->sin_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_connect_worker4); + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_connect_worker6); + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + if (try_module_get(THIS_MODULE)) return xprt; @@ -2363,11 +2463,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) return ERR_PTR(-EINVAL); } +/** + * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket + * @args: rpc transport creation arguments + * + */ +static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) +{ + struct sockaddr *addr = args->dstaddr; + struct rpc_xprt *xprt; + struct sock_xprt *transport; + struct svc_sock *bc_sock; + + if (!args->bc_xprt) + ERR_PTR(-EINVAL); + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = IPPROTO_TCP; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + xprt->timeout = &xs_tcp_default_timeout; + + /* backchannel */ + xprt_set_bound(xprt); + xprt->bind_timeout = 0; + xprt->connect_timeout = 0; + xprt->reestablish_timeout = 0; + xprt->idle_timeout = 0; + + /* + * The backchannel uses the same socket connection as the + * forechannel + */ + xprt->bc_xprt = args->bc_xprt; + bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); + bc_sock->sk_bc_xprt = xprt; + transport->sock = bc_sock->sk_sock; + transport->inet = bc_sock->sk_sk; + + xprt->ops = &bc_tcp_ops; + + switch (addr->sa_family) { + case AF_INET: + xs_format_peer_addresses(xprt, "tcp", + RPCBIND_NETID_TCP); + break; + case AF_INET6: + xs_format_peer_addresses(xprt, "tcp", + RPCBIND_NETID_TCP6); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + + /* + * Since we don't want connections for the backchannel, we set + * the xprt status to connected + */ + xprt_set_connected(xprt); + + + if (try_module_get(THIS_MODULE)) + return xprt; + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); +} + static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", .owner = THIS_MODULE, - .ident = IPPROTO_UDP, + .ident = XPRT_TRANSPORT_UDP, .setup = xs_setup_udp, }; @@ -2375,10 +2557,18 @@ static struct xprt_class xs_tcp_transport = { .list = LIST_HEAD_INIT(xs_tcp_transport.list), .name = "tcp", .owner = THIS_MODULE, - .ident = IPPROTO_TCP, + .ident = XPRT_TRANSPORT_TCP, .setup = xs_setup_tcp, }; +static struct xprt_class xs_bc_tcp_transport = { + .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), + .name = "tcp NFSv4.1 backchannel", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_BC_TCP, + .setup = xs_setup_bc_tcp, +}; + /** * init_socket_xprt - set up xprtsock's sysctls, register with RPC client * @@ -2392,6 +2582,7 @@ int init_socket_xprt(void) xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); + xprt_register_transport(&xs_bc_tcp_transport); return 0; } @@ -2411,4 +2602,57 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); + xprt_unregister_transport(&xs_bc_tcp_transport); +} + +static int param_set_uint_minmax(const char *val, struct kernel_param *kp, + unsigned int min, unsigned int max) +{ + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num < min || num > max) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} + +static int param_set_portnr(const char *val, struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_RESVPORT, + RPC_MAX_RESVPORT); } + +static int param_get_portnr(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_portnr(name, p) \ + __param_check(name, p, unsigned int); + +module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); +module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); + +static int param_set_slot_table_size(const char *val, struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_SLOT_TABLE, + RPC_MAX_SLOT_TABLE); +} + +static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_slot_table_size(name, p) \ + __param_check(name, p, unsigned int); + +module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, + slot_table_size, 0644); +module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, + slot_table_size, 0644); + |