diff options
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/auth.c | 12 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/auth_gss.c | 40 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_krb5_crypto.c | 95 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_krb5_mech.c | 24 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_krb5_seal.c | 2 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_krb5_seqnum.c | 4 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_krb5_wrap.c | 8 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_spkm3_mech.c | 29 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/svcauth_gss.c | 82 | ||||
-rw-r--r-- | net/sunrpc/auth_null.c | 8 | ||||
-rw-r--r-- | net/sunrpc/auth_unix.c | 10 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 221 | ||||
-rw-r--r-- | net/sunrpc/pmap_clnt.c | 266 | ||||
-rw-r--r-- | net/sunrpc/rpc_pipe.c | 56 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 107 | ||||
-rw-r--r-- | net/sunrpc/socklib.c | 2 | ||||
-rw-r--r-- | net/sunrpc/sunrpc_syms.c | 5 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 509 | ||||
-rw-r--r-- | net/sunrpc/svcauth.c | 4 | ||||
-rw-r--r-- | net/sunrpc/svcauth_unix.c | 43 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 427 | ||||
-rw-r--r-- | net/sunrpc/timer.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xdr.c | 54 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 90 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 124 |
25 files changed, 1465 insertions, 759 deletions
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 55163af3dca..993ff1a5d94 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -331,8 +331,8 @@ rpcauth_unbindcred(struct rpc_task *task) task->tk_msg.rpc_cred = NULL; } -u32 * -rpcauth_marshcred(struct rpc_task *task, u32 *p) +__be32 * +rpcauth_marshcred(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -342,8 +342,8 @@ rpcauth_marshcred(struct rpc_task *task, u32 *p) return cred->cr_ops->crmarshal(task, p); } -u32 * -rpcauth_checkverf(struct rpc_task *task, u32 *p) +__be32 * +rpcauth_checkverf(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -355,7 +355,7 @@ rpcauth_checkverf(struct rpc_task *task, u32 *p) int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, - u32 *data, void *obj) + __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -369,7 +369,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, - u32 *data, void *obj) + __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ef1cf5b476c..a6ed2d22a6e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -88,7 +88,6 @@ struct gss_auth { struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - char path[48]; spinlock_t lock; }; @@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (err) goto err_put_mech; - snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", - clnt->cl_pathname, - gss_auth->mech->gm_name); - gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, + clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { err = PTR_ERR(gss_auth->dentry); goto err_put_mech; @@ -829,14 +826,14 @@ out: * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static u32 * -gss_marshal(struct rpc_task *task, u32 *p) +static __be32 * +gss_marshal(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 *cred_len; + __be32 *cred_len; struct rpc_rqst *req = task->tk_rqstp; u32 maj_stat = 0; struct xdr_netobj mic; @@ -897,12 +894,12 @@ gss_refresh(struct rpc_task *task) return 0; } -static u32 * -gss_validate(struct rpc_task *task, u32 *p) +static __be32 * +gss_validate(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 seq; + __be32 seq; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; @@ -943,13 +940,14 @@ out_bad: static inline int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) + kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; struct xdr_buf integ_buf; - u32 *integ_len = NULL; + __be32 *integ_len = NULL; struct xdr_netobj mic; - u32 offset, *q; + u32 offset; + __be32 *q; struct kvec *iov; u32 maj_stat = 0; int status = -EIO; @@ -1035,13 +1033,13 @@ out: static inline int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) + kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; u32 offset; u32 maj_stat; int status; - u32 *opaque_len; + __be32 *opaque_len; struct page **inpages; int first; int pad; @@ -1098,7 +1096,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static int gss_wrap_req(struct rpc_task *task, - kxdrproc_t encode, void *rqstp, u32 *p, void *obj) + kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, @@ -1135,7 +1133,7 @@ out: static inline int gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, u32 **p) + struct rpc_rqst *rqstp, __be32 **p) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct xdr_buf integ_buf; @@ -1172,7 +1170,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static inline int gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, u32 **p) + struct rpc_rqst *rqstp, __be32 **p) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; u32 offset; @@ -1201,13 +1199,13 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static int gss_unwrap_resp(struct rpc_task *task, - kxdrproc_t decode, void *rqstp, u32 *p, void *obj) + kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 *savedp = p; + __be32 *savedp = p; struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; int savedlen = head->iov_len; int status = -EIO; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 76b969e6904..e11a40b25cc 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -34,6 +34,7 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ +#include <linux/err.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/slab.h> @@ -49,7 +50,7 @@ u32 krb5_encrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -58,26 +59,27 @@ krb5_encrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_encrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_encrypt: output data:\n"); print_hexl((u32 *)out, length, 0); @@ -90,7 +92,7 @@ EXPORT_SYMBOL(krb5_encrypt); u32 krb5_decrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -99,25 +101,26 @@ krb5_decrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_decrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_decrypt: output_data:\n"); print_hexl((u32 *)out, length, 0); @@ -197,11 +200,9 @@ out: static int checksummer(struct scatterlist *sg, void *data) { - struct crypto_tfm *tfm = (struct crypto_tfm *)data; + struct hash_desc *desc = data; - crypto_digest_update(tfm, sg, 1); - - return 0; + return crypto_hash_update(desc, sg, sg->length); } /* checksum the plaintext data and hdrlen bytes of the token header */ @@ -210,8 +211,9 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum) { char *cksumname; - struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ + struct hash_desc desc; /* XXX add to ctx? */ struct scatterlist sg[1]; + int err; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -222,25 +224,35 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, " unsupported checksum %d", cksumtype); return GSS_S_FAILURE; } - if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP))) + desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) return GSS_S_FAILURE; - cksum->len = crypto_tfm_alg_digestsize(tfm); + cksum->len = crypto_hash_digestsize(desc.tfm); + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - crypto_digest_init(tfm); + err = crypto_hash_init(&desc); + if (err) + goto out; sg_set_buf(sg, header, hdrlen); - crypto_digest_update(tfm, sg, 1); - process_xdr_buf(body, body_offset, body->len - body_offset, - checksummer, tfm); - crypto_digest_final(tfm, cksum->data); - crypto_free_tfm(tfm); - return 0; + err = crypto_hash_update(&desc, sg, hdrlen); + if (err) + goto out; + err = process_xdr_buf(body, body_offset, body->len - body_offset, + checksummer, &desc); + if (err) + goto out; + err = crypto_hash_final(&desc, cksum->data); + +out: + crypto_free_hash(desc.tfm); + return err ? GSS_S_FAILURE : 0; } EXPORT_SYMBOL(make_checksum); struct encryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; int pos; struct xdr_buf *outbuf; struct page **pages; @@ -285,8 +297,8 @@ encryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, - thislen, desc->iv); + ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, + desc->infrags, thislen); if (ret) return ret; if (fraglen) { @@ -305,16 +317,18 @@ encryptor(struct scatterlist *sg, void *data) } int -gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, - struct page **pages) +gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset, struct page **pages) { int ret; struct encryptor_desc desc; - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.pos = offset; desc.outbuf = buf; desc.pages = pages; @@ -329,7 +343,7 @@ EXPORT_SYMBOL(gss_encrypt_xdr_buf); struct decryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; struct scatterlist frags[4]; int fragno; int fraglen; @@ -355,8 +369,8 @@ decryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, - thislen, desc->iv); + ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, + desc->frags, thislen); if (ret) return ret; if (fraglen) { @@ -373,15 +387,18 @@ decryptor(struct scatterlist *sg, void *data) } int -gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset) { struct decryptor_desc desc; /* XXXJBF: */ - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.fragno = 0; desc.fraglen = 0; return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 70e1e53a632..325e72e4fd3 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -34,6 +34,7 @@ * */ +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> @@ -78,10 +79,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res) +get_key(const void *p, const void *end, struct crypto_blkcipher **res) { struct xdr_netobj key; - int alg, alg_mode; + int alg; char *alg_name; p = simple_get_bytes(p, end, &alg, sizeof(alg)); @@ -93,18 +94,19 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) switch (alg) { case ENCTYPE_DES_CBC_RAW: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; break; default: printk("gss_kerberos_mech: unsupported algorithm %d\n", alg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name); goto out_err_free_tfm; } @@ -113,7 +115,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: kfree(key.data); p = ERR_PTR(-EINVAL); @@ -172,9 +174,9 @@ gss_import_sec_context_kerberos(const void *p, return 0; out_err_free_key2: - crypto_free_tfm(ctx->seq); + crypto_free_blkcipher(ctx->seq); out_err_free_key1: - crypto_free_tfm(ctx->enc); + crypto_free_blkcipher(ctx->enc); out_err_free_mech: kfree(ctx->mech_used.data); out_err_free_ctx: @@ -187,8 +189,8 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - crypto_free_tfm(kctx->seq); - crypto_free_tfm(kctx->enc); + crypto_free_blkcipher(kctx->seq); + crypto_free_blkcipher(kctx->enc); kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 2f312164d6d..08601ee4cd7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -115,7 +115,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, krb5_hdr = ptr - 2; msg_start = krb5_hdr + 24; - *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index c53ead39118..c604baf3a5f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -41,7 +41,7 @@ #endif s32 -krb5_make_seq_num(struct crypto_tfm *key, +krb5_make_seq_num(struct crypto_blkcipher *key, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) @@ -62,7 +62,7 @@ krb5_make_seq_num(struct crypto_tfm *key, } s32 -krb5_get_seq_num(struct crypto_tfm *key, +krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, int *direction, s32 * seqnum) diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 89d1f3e1412..cc45c1605f8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, goto out_err; } - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); BUG_ON((buf->len - offset) % blocksize); plainlen = blocksize + buf->len - offset; @@ -177,9 +177,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, msg_start = krb5_hdr + 24; /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); - *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg); make_confounder(msg_start, blocksize); @@ -346,7 +346,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) /* Copy the data back to the right position. XXX: Would probably be * better to copy and encrypt at the same time. */ - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); data_start = ptr + 22 + blocksize; orig_start = buf->head[0].iov_base + offset; data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 88dcb52d171..bdedf456bc1 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -34,6 +34,7 @@ * */ +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> @@ -83,10 +84,11 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) +get_key(const void *p, const void *end, struct crypto_blkcipher **res, + int *resalg) { struct xdr_netobj key = { 0 }; - int alg_mode,setkey = 0; + int setkey = 0; char *alg_name; p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); @@ -98,14 +100,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) switch (*resalg) { case NID_des_cbc: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; setkey = 1; break; case NID_cast5_cbc: /* XXXX here in name only, not used */ - alg_name = "cast5"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(cast5)"; setkey = 0; /* XXX will need to set to 1 */ break; case NID_md5: @@ -113,19 +113,20 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); } alg_name = "md5"; - alg_mode = 0; setkey = 0; break; default: dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } if (setkey) { - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name); goto out_err_free_tfm; } @@ -136,7 +137,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: if(key.len > 0) kfree(key.data); @@ -204,9 +205,9 @@ gss_import_sec_context_spkm3(const void *p, size_t len, return 0; out_err_free_key2: - crypto_free_tfm(ctx->derived_integ_key); + crypto_free_blkcipher(ctx->derived_integ_key); out_err_free_key1: - crypto_free_tfm(ctx->derived_conf_key); + crypto_free_blkcipher(ctx->derived_conf_key); out_err_free_s_key: kfree(ctx->share_key.data); out_err_free_mech: @@ -223,8 +224,8 @@ static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - crypto_free_tfm(sctx->derived_integ_key); - crypto_free_tfm(sctx->derived_conf_key); + crypto_free_blkcipher(sctx->derived_integ_key); + crypto_free_blkcipher(sctx->derived_conf_key); kfree(sctx->share_key.data); kfree(sctx->mech_used.data); kfree(sctx); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 94217ec9e2d..638c0b57620 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -607,7 +607,7 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o) if (argv->iov_len < 4) return -1; - o->len = ntohl(svc_getu32(argv)); + o->len = svc_getnl(argv); l = round_up_to_quad(o->len); if (argv->iov_len < l) return -1; @@ -620,17 +620,17 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o) static inline int svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) { - u32 *p; + u8 *p; if (resv->iov_len + 4 > PAGE_SIZE) return -1; - svc_putu32(resv, htonl(o->len)); + svc_putnl(resv, o->len); p = resv->iov_base + resv->iov_len; resv->iov_len += round_up_to_quad(o->len); if (resv->iov_len > PAGE_SIZE) return -1; memcpy(p, o->data, o->len); - memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len); + memset(p + o->len, 0, round_up_to_quad(o->len) - o->len); return 0; } @@ -640,7 +640,7 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) */ static int gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, - u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp) + __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp) { struct gss_ctx *ctx_id = rsci->mechctx; struct xdr_buf rpchdr; @@ -657,7 +657,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, *authp = rpc_autherr_badverf; if (argv->iov_len < 4) return SVC_DENIED; - flavor = ntohl(svc_getu32(argv)); + flavor = svc_getnl(argv); if (flavor != RPC_AUTH_GSS) return SVC_DENIED; if (svc_safe_getnetobj(argv, &checksum)) @@ -687,9 +687,9 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, static int gss_write_null_verf(struct svc_rqst *rqstp) { - u32 *p; + __be32 *p; - svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_NULL)); + svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; /* don't really need to check if head->iov_len > PAGE_SIZE ... */ *p++ = 0; @@ -701,14 +701,14 @@ gss_write_null_verf(struct svc_rqst *rqstp) static int gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) { - u32 xdr_seq; + __be32 xdr_seq; u32 maj_stat; struct xdr_buf verf_data; struct xdr_netobj mic; - u32 *p; + __be32 *p; struct kvec iov; - svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS)); + svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); xdr_seq = htonl(seq); iov.iov_base = &xdr_seq; @@ -782,7 +782,7 @@ EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); static inline int read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) { - u32 raw; + __be32 raw; int status; status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); @@ -805,7 +805,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) struct xdr_netobj mic; struct xdr_buf integ_buf; - integ_len = ntohl(svc_getu32(&buf->head[0])); + integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) goto out; if (integ_len > buf->len) @@ -825,7 +825,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); if (maj_stat != GSS_S_COMPLETE) goto out; - if (ntohl(svc_getu32(&buf->head[0])) != seq) + if (svc_getnl(&buf->head[0]) != seq) goto out; stat = 0; out: @@ -857,7 +857,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs rqstp->rq_sendfile_ok = 0; - priv_len = ntohl(svc_getu32(&buf->head[0])); + priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { /* Already decrypted last time through! The sequence number * check at out_seq is unnecessary but harmless: */ @@ -895,7 +895,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs if (maj_stat != GSS_S_COMPLETE) return -EINVAL; out_seq: - if (ntohl(svc_getu32(&buf->head[0])) != seq) + if (svc_getnl(&buf->head[0]) != seq) return -EINVAL; return 0; } @@ -905,7 +905,7 @@ struct gss_svc_data { struct rpc_gss_wire_cred clcred; /* pointer to the beginning of the procedure-specific results, * which may be encrypted/checksummed in svcauth_gss_release: */ - u32 *body_start; + __be32 *body_start; struct rsc *rsci; }; @@ -946,7 +946,7 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) * response here and return SVC_COMPLETE. */ static int -svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -956,8 +956,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; struct rsi *rsip, rsikey; - u32 *rpcstart; - u32 *reject_stat = resv->iov_base + resv->iov_len; + __be32 *rpcstart; + __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",argv->iov_len); @@ -985,12 +985,12 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) if (argv->iov_len < 5 * 4) goto auth_err; - crlen = ntohl(svc_getu32(argv)); - if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION) + crlen = svc_getnl(argv); + if (svc_getnl(argv) != RPC_GSS_VERSION) goto auth_err; - gc->gc_proc = ntohl(svc_getu32(argv)); - gc->gc_seq = ntohl(svc_getu32(argv)); - gc->gc_svc = ntohl(svc_getu32(argv)); + gc->gc_proc = svc_getnl(argv); + gc->gc_seq = svc_getnl(argv); + gc->gc_svc = svc_getnl(argv); if (svc_safe_getnetobj(argv, &gc->gc_ctx)) goto auth_err; if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4) @@ -1016,9 +1016,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) case RPC_GSS_PROC_CONTINUE_INIT: if (argv->iov_len < 2 * 4) goto auth_err; - if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL) + if (svc_getnl(argv) != RPC_AUTH_NULL) goto auth_err; - if (ntohl(svc_getu32(argv)) != 0) + if (svc_getnl(argv) != 0) goto auth_err; break; case RPC_GSS_PROC_DATA: @@ -1076,14 +1076,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto drop; if (resv->iov_len + 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, rpc_success); + svc_putnl(resv, RPC_SUCCESS); if (svc_safe_putnetobj(resv, &rsip->out_handle)) goto drop; if (resv->iov_len + 3 * 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, htonl(rsip->major_status)); - svc_putu32(resv, htonl(rsip->minor_status)); - svc_putu32(resv, htonl(GSS_SEQ_WIN)); + svc_putnl(resv, rsip->major_status); + svc_putnl(resv, rsip->minor_status); + svc_putnl(resv, GSS_SEQ_WIN); if (svc_safe_putnetobj(resv, &rsip->out_token)) goto drop; rqstp->rq_client = NULL; @@ -1093,7 +1093,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, rpc_success); + svc_putnl(resv, RPC_SUCCESS); goto complete; case RPC_GSS_PROC_DATA: *authp = rpcsec_gsserr_ctxproblem; @@ -1111,8 +1111,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto auth_err; /* placeholders for length and seq. number: */ svcdata->body_start = resv->iov_base + resv->iov_len; - svc_putu32(resv, 0); - svc_putu32(resv, 0); + svc_putnl(resv, 0); + svc_putnl(resv, 0); break; case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, @@ -1120,8 +1120,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto auth_err; /* placeholders for length and seq. number: */ svcdata->body_start = resv->iov_base + resv->iov_len; - svc_putu32(resv, 0); - svc_putu32(resv, 0); + svc_putnl(resv, 0); + svc_putnl(resv, 0); break; default: goto auth_err; @@ -1156,7 +1156,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) struct xdr_buf integ_buf; struct xdr_netobj mic; struct kvec *resv; - u32 *p; + __be32 *p; int integ_offset, integ_len; int stat = -EINVAL; @@ -1199,7 +1199,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) goto out_err; - svc_putu32(resv, htonl(mic.len)); + svc_putnl(resv, mic.len); memset(mic.data + mic.len, 0, round_up_to_quad(mic.len) - mic.len); resv->iov_len += XDR_QUADLEN(mic.len) << 2; @@ -1219,8 +1219,8 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *resbuf = &rqstp->rq_res; struct page **inpages = NULL; - u32 *p; - int offset, *len; + __be32 *p, *len; + int offset; int pad; p = gsd->body_start; @@ -1264,7 +1264,7 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) return -ENOMEM; *len = htonl(resbuf->len - offset); pad = 3 - ((resbuf->len - offset - 1)&3); - p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); + p = (__be32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); memset(p, 0, pad); resbuf->tail[0].iov_len += pad; resbuf->len += pad; diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 2eccffa96ba..3be257dc32b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -60,8 +60,8 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) /* * Marshal credential. */ -static u32 * -nul_marshal(struct rpc_task *task, u32 *p) +static __be32 * +nul_marshal(struct rpc_task *task, __be32 *p) { *p++ = htonl(RPC_AUTH_NULL); *p++ = 0; @@ -81,8 +81,8 @@ nul_refresh(struct rpc_task *task) return 0; } -static u32 * -nul_validate(struct rpc_task *task, u32 *p) +static __be32 * +nul_validate(struct rpc_task *task, __be32 *p) { rpc_authflavor_t flavor; u32 size; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 74c7406a105..f7f990c9afe 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -137,12 +137,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static u32 * -unx_marshal(struct rpc_task *task, u32 *p) +static __be32 * +unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; - u32 *base, *hold; + __be32 *base, *hold; int i; *p++ = htonl(RPC_AUTH_UNIX); @@ -178,8 +178,8 @@ unx_refresh(struct rpc_task *task) return 0; } -static u32 * -unx_validate(struct rpc_task *task, u32 *p) +static __be32 * +unx_validate(struct rpc_task *task, __be32 *p) { rpc_authflavor_t flavor; u32 size; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3e19d321067..78696f2dc7d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -60,8 +60,8 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static u32 * call_header(struct rpc_task *task); -static u32 * call_verify(struct rpc_task *task); +static __be32 * call_header(struct rpc_task *task); +static __be32 * call_verify(struct rpc_task *task); static int @@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) } } -/* - * Create an RPC client - * FIXME: This should also take a flags argument (as in task->tk_flags). - * It's called (among others) from pmap_create_client, which may in - * turn be called by an async task. In this case, rpciod should not be - * made to sleep too long. - */ -struct rpc_clnt * -rpc_new_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *program, u32 vers, - rpc_authflavor_t flavor) +static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) { struct rpc_version *version; struct rpc_clnt *clnt = NULL; @@ -147,16 +137,12 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; - clnt->cl_pmap = &clnt->cl_pmap_default; - clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; - clnt->cl_prot = xprt->prot; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); - rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); - if (!clnt->cl_port) + if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; clnt->cl_rtt = &clnt->cl_rtt_default; @@ -175,10 +161,10 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, } /* save the nodename */ - clnt->cl_nodelen = strlen(system_utsname.nodename); + clnt->cl_nodelen = strlen(utsname()->nodename); if (clnt->cl_nodelen > UNX_MAXNODENAME) clnt->cl_nodelen = UNX_MAXNODENAME; - memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen); + memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); return clnt; out_no_auth: @@ -191,40 +177,71 @@ out_no_path: kfree(clnt->cl_server); kfree(clnt); out_err: - xprt_destroy(xprt); + xprt_put(xprt); out_no_xprt: return ERR_PTR(err); } -/** - * Create an RPC client - * @xprt - pointer to xprt struct - * @servname - name of server - * @info - rpc_program - * @version - rpc_program version - * @authflavor - rpc_auth flavour to use +/* + * rpc_create - create an RPC client and transport with one call + * @args: rpc_clnt create argument structure * - * Creates an RPC client structure, then pings the server in order to - * determine if it is up, and if it supports this program and version. + * Creates and initializes an RPC transport and an RPC client. * - * This function should never be called by asynchronous tasks such as - * the portmapper. + * It can ping the server in order to determine if it is up, and to see if + * it supports this program and version. RPC_CLNT_CREATE_NOPING disables + * this behavior so asynchronous tasks can also use rpc_create. */ -struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, u32 version, rpc_authflavor_t authflavor) +struct rpc_clnt *rpc_create(struct rpc_create_args *args) { + struct rpc_xprt *xprt; struct rpc_clnt *clnt; - int err; - - clnt = rpc_new_client(xprt, servname, info, version, authflavor); + + xprt = xprt_create_transport(args->protocol, args->address, + args->addrsize, args->timeout); + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; + + /* + * By default, kernel RPC client connects from a reserved port. + * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, + * but it is always enabled for rpciod, which handles the connect + * operation. + */ + xprt->resvport = 1; + if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) + xprt->resvport = 0; + + dprintk("RPC: creating %s client for %s (xprt %p)\n", + args->program->name, args->servername, xprt); + + clnt = rpc_new_client(xprt, args->servername, args->program, + args->version, args->authflavor); if (IS_ERR(clnt)) return clnt; - err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); - if (err == 0) - return clnt; - rpc_shutdown_client(clnt); - return ERR_PTR(err); + + if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { + int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } + } + + clnt->cl_softrtry = 1; + if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + clnt->cl_softrtry = 0; + + if (args->flags & RPC_CLNT_CREATE_INTR) + clnt->cl_intr = 1; + if (args->flags & RPC_CLNT_CREATE_AUTOBIND) + clnt->cl_autobind = 1; + if (args->flags & RPC_CLNT_CREATE_ONESHOT) + clnt->cl_oneshot = 1; + + return clnt; } +EXPORT_SYMBOL_GPL(rpc_create); /* * This function clones the RPC client structure. It allows us to share the @@ -244,8 +261,7 @@ rpc_clone_client(struct rpc_clnt *clnt) atomic_set(&new->cl_users, 0); new->cl_parent = clnt; atomic_inc(&clnt->cl_count); - /* Duplicate portmapper */ - rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); + new->cl_xprt = xprt_get(clnt->cl_xprt); /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_oneshot = 0; @@ -255,8 +271,7 @@ rpc_clone_client(struct rpc_clnt *clnt) rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); - new->cl_pmap = &new->cl_pmap_default; - new->cl_metrics = rpc_alloc_iostats(clnt); + new->cl_metrics = rpc_alloc_iostats(clnt); return new; out_no_clnt: printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); @@ -323,15 +338,12 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } - if (clnt->cl_xprt) { - xprt_destroy(clnt->cl_xprt); - clnt->cl_xprt = NULL; - } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; + xprt_put(clnt->cl_xprt); kfree(clnt); return 0; } @@ -540,6 +552,40 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) task->tk_action = rpc_exit_task; } +/** + * rpc_peeraddr - extract remote peer address from clnt's xprt + * @clnt: RPC client structure + * @buf: target buffer + * @size: length of target buffer + * + * Returns the number of bytes that are actually in the stored address. + */ +size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) +{ + size_t bytes; + struct rpc_xprt *xprt = clnt->cl_xprt; + + bytes = sizeof(xprt->addr); + if (bytes > bufsize) + bytes = bufsize; + memcpy(buf, &clnt->cl_xprt->addr, bytes); + return xprt->addrlen; +} +EXPORT_SYMBOL_GPL(rpc_peeraddr); + +/** + * rpc_peeraddr2str - return remote peer address in printable format + * @clnt: RPC client structure + * @format: address format + * + */ +char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) +{ + struct rpc_xprt *xprt = clnt->cl_xprt; + return xprt->ops->print_addr(xprt, format); +} +EXPORT_SYMBOL_GPL(rpc_peeraddr2str); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { @@ -560,7 +606,7 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) { return clnt->cl_xprt->max_payload; } -EXPORT_SYMBOL(rpc_max_payload); +EXPORT_SYMBOL_GPL(rpc_max_payload); /** * rpc_force_rebind - force transport to check that remote port is unchanged @@ -570,9 +616,9 @@ EXPORT_SYMBOL(rpc_max_payload); void rpc_force_rebind(struct rpc_clnt *clnt) { if (clnt->cl_autobind) - clnt->cl_port = 0; + xprt_clear_bound(clnt->cl_xprt); } -EXPORT_SYMBOL(rpc_force_rebind); +EXPORT_SYMBOL_GPL(rpc_force_rebind); /* * Restart an (async) RPC call. Usually called from within the @@ -736,7 +782,7 @@ call_encode(struct rpc_task *task) struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; - u32 *p; + __be32 *p; dprintk("RPC: %4d call_encode (status %d)\n", task->tk_pid, task->tk_status); @@ -781,16 +827,16 @@ call_encode(struct rpc_task *task) static void call_bind(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind (status %d)\n", task->tk_pid, task->tk_status); task->tk_action = call_connect; - if (!clnt->cl_port) { + if (!xprt_bound(xprt)) { task->tk_action = call_bind_status; - task->tk_timeout = task->tk_xprt->bind_timeout; - rpc_getport(task, clnt); + task->tk_timeout = xprt->bind_timeout; + xprt->ops->rpcbind(task); } } @@ -815,15 +861,11 @@ call_bind_status(struct rpc_task *task) dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", task->tk_pid); rpc_delay(task, 3*HZ); - goto retry_bind; + goto retry_timeout; case -ETIMEDOUT: dprintk("RPC: %4d rpcbind request timed out\n", task->tk_pid); - if (RPC_IS_SOFT(task)) { - status = -EIO; - break; - } - goto retry_bind; + goto retry_timeout; case -EPFNOSUPPORT: dprintk("RPC: %4d remote rpcbind service unavailable\n", task->tk_pid); @@ -836,16 +878,13 @@ call_bind_status(struct rpc_task *task) dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); status = -EIO; - break; } rpc_exit(task, status); return; -retry_bind: - task->tk_status = 0; - task->tk_action = call_bind; - return; +retry_timeout: + task->tk_action = call_timeout; } /* @@ -893,14 +932,16 @@ call_connect_status(struct rpc_task *task) switch (status) { case -ENOTCONN: - case -ETIMEDOUT: case -EAGAIN: task->tk_action = call_bind; - break; - default: - rpc_exit(task, -EIO); - break; + if (!RPC_IS_SOFT(task)) + return; + /* if soft mounted, test if we've timed out */ + case -ETIMEDOUT: + task->tk_action = call_timeout; + return; } + rpc_exit(task, -EIO); } /* @@ -982,6 +1023,14 @@ call_status(struct rpc_task *task) task->tk_status = 0; switch(status) { + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + /* + * Delay any retries for 3 seconds, then handle as if it + * were a timeout. + */ + rpc_delay(task, 3*HZ); case -ETIMEDOUT: task->tk_action = call_timeout; break; @@ -1001,7 +1050,6 @@ call_status(struct rpc_task *task) printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); - break; } } @@ -1052,7 +1100,7 @@ call_decode(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode; - u32 *p; + __be32 *p; dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); @@ -1069,10 +1117,10 @@ call_decode(struct rpc_task *task) clnt->cl_stats->rpcretrans++; goto out_retry; } - printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n", + dprintk("%s: too small RPC reply size (%d bytes)\n", clnt->cl_protname, task->tk_status); - rpc_exit(task, -EIO); - return; + task->tk_action = call_timeout; + goto out_retry; } /* @@ -1149,12 +1197,12 @@ call_refreshresult(struct rpc_task *task) /* * Call header serialization */ -static u32 * +static __be32 * call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - u32 *p = req->rq_svec[0].iov_base; + __be32 *p = req->rq_svec[0].iov_base; /* FIXME: check buffer size? */ @@ -1173,12 +1221,13 @@ call_header(struct rpc_task *task) /* * Reply header verification */ -static u32 * +static __be32 * call_verify(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; - u32 *p = iov->iov_base, n; + __be32 *p = iov->iov_base; + u32 n; int error = -EACCES; if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { @@ -1255,7 +1304,7 @@ call_verify(struct rpc_task *task) printk(KERN_WARNING "call_verify: auth check failed\n"); goto out_garbage; /* bad verifier, retry */ } - len = p - (u32 *)iov->iov_base - 1; + len = p - (__be32 *)iov->iov_base - 1; if (len < 0) goto out_overflow; switch ((n = ntohl(*p++))) { @@ -1310,12 +1359,12 @@ out_overflow: goto out_garbage; } -static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) +static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj) { return 0; } -static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj) +static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj) { return 0; } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 623180f224c..919d5ba7ca0 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -1,7 +1,9 @@ /* - * linux/net/sunrpc/pmap.c + * linux/net/sunrpc/pmap_clnt.c * - * Portmapper client. + * In-kernel RPC portmapper client. + * + * Portmapper supports version 2 of the rpcbind protocol (RFC 1833). * * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ @@ -13,7 +15,6 @@ #include <linux/uio.h> #include <linux/in.h> #include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xprt.h> #include <linux/sunrpc/sched.h> #ifdef RPC_DEBUG @@ -24,80 +25,141 @@ #define PMAP_UNSET 2 #define PMAP_GETPORT 3 +struct portmap_args { + u32 pm_prog; + u32 pm_vers; + u32 pm_prot; + unsigned short pm_port; + struct rpc_xprt * pm_xprt; +}; + static struct rpc_procinfo pmap_procedures[]; static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); -static void pmap_getport_done(struct rpc_task *); +static void pmap_getport_done(struct rpc_task *, void *); static struct rpc_program pmap_program; -static DEFINE_SPINLOCK(pmap_lock); -/* - * Obtain the port for a given RPC service on a given host. This one can - * be called for an ongoing RPC request. - */ -void -rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) +static void pmap_getport_prepare(struct rpc_task *task, void *calldata) { - struct rpc_portmap *map = clnt->cl_pmap; - struct sockaddr_in *sap = &clnt->cl_xprt->addr; + struct portmap_args *map = calldata; struct rpc_message msg = { .rpc_proc = &pmap_procedures[PMAP_GETPORT], .rpc_argp = map, - .rpc_resp = &clnt->cl_port, - .rpc_cred = NULL + .rpc_resp = &map->pm_port, }; + + rpc_call_setup(task, &msg, 0); +} + +static inline struct portmap_args *pmap_map_alloc(void) +{ + return kmalloc(sizeof(struct portmap_args), GFP_NOFS); +} + +static inline void pmap_map_free(struct portmap_args *map) +{ + kfree(map); +} + +static void pmap_map_release(void *data) +{ + pmap_map_free(data); +} + +static const struct rpc_call_ops pmap_getport_ops = { + .rpc_call_prepare = pmap_getport_prepare, + .rpc_call_done = pmap_getport_done, + .rpc_release = pmap_map_release, +}; + +static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status) +{ + xprt_clear_binding(xprt); + rpc_wake_up_status(&xprt->binding, status); +} + +/** + * rpc_getport - obtain the port for a given RPC service on a given host + * @task: task that is waiting for portmapper request + * + * This one can be called for an ongoing RPC request, and can be used in + * an async (rpciod) context. + */ +void rpc_getport(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; + struct sockaddr_in addr; + struct portmap_args *map; struct rpc_clnt *pmap_clnt; - struct rpc_task *child; + struct rpc_task *child; + int status; - dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", + dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n", task->tk_pid, clnt->cl_server, - map->pm_prog, map->pm_vers, map->pm_prot); + clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); - spin_lock(&pmap_lock); - if (map->pm_binding) { - rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); - spin_unlock(&pmap_lock); + if (xprt_test_and_set_binding(xprt)) { + task->tk_status = -EACCES; /* tell caller to check again */ + rpc_sleep_on(&xprt->binding, task, NULL, NULL); return; } - map->pm_binding = 1; - spin_unlock(&pmap_lock); - pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); - if (IS_ERR(pmap_clnt)) { - task->tk_status = PTR_ERR(pmap_clnt); + /* Someone else may have bound if we slept */ + status = 0; + if (xprt_bound(xprt)) + goto bailout_nofree; + + status = -ENOMEM; + map = pmap_map_alloc(); + if (!map) + goto bailout_nofree; + map->pm_prog = clnt->cl_prog; + map->pm_vers = clnt->cl_vers; + map->pm_prot = xprt->prot; + map->pm_port = 0; + map->pm_xprt = xprt_get(xprt); + + rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr)); + pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0); + status = PTR_ERR(pmap_clnt); + if (IS_ERR(pmap_clnt)) goto bailout; - } - task->tk_status = 0; - /* - * Note: rpc_new_child will release client after a failure. - */ - if (!(child = rpc_new_child(pmap_clnt, task))) + status = -EIO; + child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); + if (IS_ERR(child)) goto bailout; + rpc_release_task(child); - /* Setup the call info struct */ - rpc_call_setup(child, &msg, 0); + rpc_sleep_on(&xprt->binding, task, NULL, NULL); - /* ... and run the child task */ task->tk_xprt->stat.bind_count++; - rpc_run_child(task, child, pmap_getport_done); return; bailout: - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); - rpc_exit(task, -EIO); + pmap_map_free(map); + xprt_put(xprt); +bailout_nofree: + task->tk_status = status; + pmap_wake_portmap_waiters(xprt, status); } #ifdef CONFIG_ROOT_NFS -int -rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) +/** + * rpc_getport_external - obtain the port for a given RPC service on a given host + * @sin: address of remote peer + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * + * This one is called from outside the RPC client in a synchronous task context. + */ +int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) { - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -112,7 +174,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) char hostname[32]; int status; - dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n", + dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); @@ -132,45 +194,53 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) } #endif -static void -pmap_getport_done(struct rpc_task *task) +/* + * Portmapper child task invokes this callback via tk_exit. + */ +static void pmap_getport_done(struct rpc_task *child, void *data) { - struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_portmap *map = clnt->cl_pmap; - - dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", - task->tk_pid, task->tk_status, clnt->cl_port); - - xprt->ops->set_port(xprt, 0); - if (task->tk_status < 0) { - /* Make the calling task exit with an error */ - task->tk_action = rpc_exit_task; - } else if (clnt->cl_port == 0) { - /* Program not registered */ - rpc_exit(task, -EACCES); + struct portmap_args *map = data; + struct rpc_xprt *xprt = map->pm_xprt; + int status = child->tk_status; + + if (status < 0) { + /* Portmapper not available */ + xprt->ops->set_port(xprt, 0); + } else if (map->pm_port == 0) { + /* Requested RPC service wasn't registered */ + xprt->ops->set_port(xprt, 0); + status = -EACCES; } else { - xprt->ops->set_port(xprt, clnt->cl_port); - clnt->cl_port = htons(clnt->cl_port); + /* Succeeded */ + xprt->ops->set_port(xprt, map->pm_port); + xprt_set_bound(xprt); + status = 0; } - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); + + dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n", + child->tk_pid, status, map->pm_port); + + pmap_wake_portmap_waiters(xprt, status); + xprt_put(xprt); } -/* - * Set or unset a port registration with the local portmapper. +/** + * rpc_register - set or unset a port registration with the local portmapper + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * @port: port value to register + * @okay: result code + * * port == 0 means unregister, port != 0 means register. */ -int -rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -184,7 +254,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) struct rpc_clnt *pmap_clnt; int error = 0; - dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", + dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n", prog, vers, prot, port); pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); @@ -207,38 +277,32 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return error; } -static struct rpc_clnt * -pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) +static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) { - struct rpc_xprt *xprt; - struct rpc_clnt *clnt; - - /* printk("pmap: create xprt\n"); */ - xprt = xprt_create_proto(proto, srvaddr, NULL); - if (IS_ERR(xprt)) - return (struct rpc_clnt *)xprt; - xprt->ops->set_port(xprt, RPC_PMAP_PORT); + struct rpc_create_args args = { + .protocol = proto, + .address = (struct sockaddr *)srvaddr, + .addrsize = sizeof(*srvaddr), + .servername = hostname, + .program = &pmap_program, + .version = RPC_PMAP_VERSION, + .authflavor = RPC_AUTH_UNIX, + .flags = (RPC_CLNT_CREATE_ONESHOT | + RPC_CLNT_CREATE_NOPING), + }; + + srvaddr->sin_port = htons(RPC_PMAP_PORT); if (!privileged) - xprt->resvport = 0; - - /* printk("pmap: create clnt\n"); */ - clnt = rpc_new_client(xprt, hostname, - &pmap_program, RPC_PMAP_VERSION, - RPC_AUTH_UNIX); - if (!IS_ERR(clnt)) { - clnt->cl_softrtry = 1; - clnt->cl_oneshot = 1; - } - return clnt; + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + return rpc_create(&args); } /* * XDR encode/decode functions for PMAP */ -static int -xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) +static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map) { - dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", + dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n", map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); *p++ = htonl(map->pm_prog); *p++ = htonl(map->pm_vers); @@ -249,15 +313,13 @@ xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) return 0; } -static int -xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) +static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); return 0; } -static int -xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) +static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); return 0; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 0b1a1ac8a4b..9a0b41a97f9 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -327,10 +327,8 @@ rpc_show_info(struct seq_file *m, void *v) seq_printf(m, "RPC server: %s\n", clnt->cl_server); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); - seq_printf(m, "address: %u.%u.%u.%u\n", - NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); - seq_printf(m, "protocol: %s\n", - clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); + seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); return 0; } @@ -490,14 +488,13 @@ rpc_get_inode(struct super_block *sb, int mode) return NULL; inode->i_mode = mode; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; - inode->i_nlink++; + inc_nlink(inode); default: break; } @@ -574,7 +571,7 @@ rpc_populate(struct dentry *parent, if (private) rpc_inode_setowner(inode, private); if (S_ISDIR(mode)) - dir->i_nlink++; + inc_nlink(dir); d_add(dentry, inode); } mutex_unlock(&dir->i_mutex); @@ -596,7 +593,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) goto out_err; inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); - dir->i_nlink++; + inc_nlink(dir); inode_dir_notify(dir, DN_CREATE); return 0; out_err: @@ -623,17 +620,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) } static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) +rpc_lookup_create(struct dentry *parent, const char *name, int len) { + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir; - int error; - if ((error = rpc_lookup_parent(path, nd)) != 0) - return ERR_PTR(error); - dir = nd->dentry->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); + dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; if (dentry->d_inode) { @@ -644,7 +637,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd) return dentry; out_err: mutex_unlock(&dir->i_mutex); - rpc_release_path(nd); + return dentry; +} + +static struct dentry * +rpc_lookup_negative(char *path, struct nameidata *nd) +{ + struct dentry *dentry; + int error; + + if ((error = rpc_lookup_parent(path, nd)) != 0) + return ERR_PTR(error); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + if (IS_ERR(dentry)) + rpc_release_path(nd); return dentry; } @@ -703,18 +709,17 @@ rpc_rmdir(struct dentry *dentry) } struct dentry * -rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) +rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) { - struct nameidata nd; struct dentry *dentry; struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_negative(path, &nd); + dentry = rpc_lookup_create(parent, name, strlen(name)); if (IS_ERR(dentry)) return dentry; - dir = nd.dentry->d_inode; - inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); + dir = parent->d_inode; + inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; inode->i_ino = iunique(dir->i_sb, 100); @@ -728,13 +733,13 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) dget(dentry); out: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); return dentry; err_dput: dput(dentry); dentry = ERR_PTR(-ENOMEM); - printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", - __FILE__, __FUNCTION__, path, -ENOMEM); + printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", + __FILE__, __FUNCTION__, parent->d_name.name, name, + -ENOMEM); goto out; } @@ -852,7 +857,6 @@ int register_rpc_pipefs(void) void unregister_rpc_pipefs(void) { - if (kmem_cache_destroy(rpc_inode_cachep)) - printk(KERN_WARNING "RPC: unable to free inode cache\n"); + kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5c3eee76850..a1ab4eed41f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -21,7 +21,6 @@ #include <linux/mutex.h> #include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xprt.h> #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED @@ -45,12 +44,6 @@ static void rpciod_killall(void); static void rpc_async_schedule(void *); /* - * RPC tasks that create another task (e.g. for contacting the portmapper) - * will wait on this queue for their child's completion - */ -static RPC_WAITQ(childq, "childq"); - -/* * RPC tasks sit here while waiting for conditions to improve. */ static RPC_WAITQ(delay_queue, "delayq"); @@ -324,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task) } /* - * Place a newly initialized task on the workqueue. - */ -static inline void -rpc_schedule_run(struct rpc_task *task) -{ - rpc_set_active(task); - rpc_make_runnable(task); -} - -/* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. * NB: An RPC task will only receive interrupt-driven events as long @@ -559,24 +542,20 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) spin_unlock_bh(&queue->lock); } +static void __rpc_atrun(struct rpc_task *task) +{ + rpc_wake_up_task(task); +} + /* * Run a task at a later time */ -static void __rpc_atrun(struct rpc_task *); -void -rpc_delay(struct rpc_task *task, unsigned long delay) +void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); } -static void -__rpc_atrun(struct rpc_task *task) -{ - task->tk_status = 0; - rpc_wake_up_task(task); -} - /* * Helper to call task->tk_ops->rpc_call_prepare */ @@ -933,72 +912,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, } EXPORT_SYMBOL(rpc_run_task); -/** - * rpc_find_parent - find the parent of a child task. - * @child: child task - * @parent: parent task - * - * Checks that the parent task is still sleeping on the - * queue 'childq'. If so returns a pointer to the parent. - * Upon failure returns NULL. - * - * Caller must hold childq.lock - */ -static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) -{ - struct rpc_task *task; - struct list_head *le; - - task_for_each(task, le, &childq.tasks[0]) - if (task == parent) - return parent; - - return NULL; -} - -static void rpc_child_exit(struct rpc_task *child, void *calldata) -{ - struct rpc_task *parent; - - spin_lock_bh(&childq.lock); - if ((parent = rpc_find_parent(child, calldata)) != NULL) { - parent->tk_status = child->tk_status; - __rpc_wake_up_task(parent); - } - spin_unlock_bh(&childq.lock); -} - -static const struct rpc_call_ops rpc_child_ops = { - .rpc_call_done = rpc_child_exit, -}; - -/* - * Note: rpc_new_task releases the client after a failure. - */ -struct rpc_task * -rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) -{ - struct rpc_task *task; - - task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); - if (!task) - goto fail; - return task; - -fail: - parent->tk_status = -ENOMEM; - return NULL; -} - -void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) -{ - spin_lock_bh(&childq.lock); - /* N.B. Is it possible for the child to have already finished? */ - __rpc_sleep_on(&childq, task, func, NULL); - rpc_schedule_run(child); - spin_unlock_bh(&childq.lock); -} - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? @@ -1146,10 +1059,10 @@ rpc_destroy_mempool(void) mempool_destroy(rpc_buffer_mempool); if (rpc_task_mempool) mempool_destroy(rpc_task_mempool); - if (rpc_task_slabp && kmem_cache_destroy(rpc_task_slabp)) - printk(KERN_INFO "rpc_task: not all structures were freed\n"); - if (rpc_buffer_slabp && kmem_cache_destroy(rpc_buffer_slabp)) - printk(KERN_INFO "rpc_buffers: not all structures were freed\n"); + if (rpc_task_slabp) + kmem_cache_destroy(rpc_task_slabp); + if (rpc_buffer_slabp) + kmem_cache_destroy(rpc_buffer_slabp); } int diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index eb330d4f66d..6f17527b9e6 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if ((unsigned short)csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f38f939ce95..192dff5dabc 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -36,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status); EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ -EXPORT_SYMBOL(rpc_create_client); -EXPORT_SYMBOL(rpc_new_client); EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); EXPORT_SYMBOL(rpc_destroy_client); @@ -57,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall); EXPORT_SYMBOL(rpc_mkpipe); /* Client transport */ -EXPORT_SYMBOL(xprt_create_proto); EXPORT_SYMBOL(xprt_set_timeout); /* Client credential cache */ @@ -73,6 +70,8 @@ EXPORT_SYMBOL(put_rpccred); /* RPC server stuff */ EXPORT_SYMBOL(svc_create); EXPORT_SYMBOL(svc_create_thread); +EXPORT_SYMBOL(svc_create_pooled); +EXPORT_SYMBOL(svc_set_num_threads); EXPORT_SYMBOL(svc_exit_thread); EXPORT_SYMBOL(svc_destroy); EXPORT_SYMBOL(svc_drop); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b76a227dd3a..a99e67b164c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -4,6 +4,10 @@ * High-level RPC service routines * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + * + * Multiple threads pools and NUMAisation + * Copyright (c) 2006 Silicon Graphics, Inc. + * by Greg Banks <gnb@melbourne.sgi.com> */ #include <linux/linkage.h> @@ -12,6 +16,8 @@ #include <linux/net.h> #include <linux/in.h> #include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/module.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> @@ -23,14 +29,252 @@ #define RPC_PARANOIA 1 /* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + +/* + * Structure for mapping cpus to pools and vice versa. + * Setup once during sunrpc initialisation. + */ +static struct svc_pool_map { + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +} svc_pool_map = { + .mode = SVC_POOL_NONE +}; + + +/* + * Detect best pool mapping mode heuristically, + * according to the machine's topology. + */ +static int +svc_pool_map_choose_mode(void) +{ + unsigned int node; + + if (num_online_nodes() > 1) { + /* + * Actually have multiple NUMA nodes, + * so split pools on NUMA node boundaries + */ + return SVC_POOL_PERNODE; + } + + node = any_online_node(node_online_map); + if (nr_cpus_node(node) > 2) { + /* + * Non-trivial SMP, or CONFIG_NUMA on + * non-NUMA hardware, e.g. with a generic + * x86_64 kernel on Xeons. In this case we + * want to divide the pools on cpu boundaries. + */ + return SVC_POOL_PERCPU; + } + + /* default: one global pool */ + return SVC_POOL_GLOBAL; +} + +/* + * Allocate the to_pool[] and pool_to[] arrays. + * Returns 0 on success or an errno. + */ +static int +svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) +{ + m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); + if (!m->to_pool) + goto fail; + m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); + if (!m->pool_to) + goto fail_free; + + return 0; + +fail_free: + kfree(m->to_pool); +fail: + return -ENOMEM; +} + +/* + * Initialise the pool map for SVC_POOL_PERCPU mode. + * Returns number of pools or <0 on error. + */ +static int +svc_pool_map_init_percpu(struct svc_pool_map *m) +{ + unsigned int maxpools = highest_possible_processor_id()+1; + unsigned int pidx = 0; + unsigned int cpu; + int err; + + err = svc_pool_map_alloc_arrays(m, maxpools); + if (err) + return err; + + for_each_online_cpu(cpu) { + BUG_ON(pidx > maxpools); + m->to_pool[cpu] = pidx; + m->pool_to[pidx] = cpu; + pidx++; + } + /* cpus brought online later all get mapped to pool0, sorry */ + + return pidx; +}; + + +/* + * Initialise the pool map for SVC_POOL_PERNODE mode. + * Returns number of pools or <0 on error. + */ +static int +svc_pool_map_init_pernode(struct svc_pool_map *m) +{ + unsigned int maxpools = highest_possible_node_id()+1; + unsigned int pidx = 0; + unsigned int node; + int err; + + err = svc_pool_map_alloc_arrays(m, maxpools); + if (err) + return err; + + for_each_node_with_cpus(node) { + /* some architectures (e.g. SN2) have cpuless nodes */ + BUG_ON(pidx > maxpools); + m->to_pool[node] = pidx; + m->pool_to[pidx] = node; + pidx++; + } + /* nodes brought online later all get mapped to pool0, sorry */ + + return pidx; +} + + +/* + * Build the global map of cpus to pools and vice versa. + */ +static unsigned int +svc_pool_map_init(void) +{ + struct svc_pool_map *m = &svc_pool_map; + int npools = -1; + + if (m->mode != SVC_POOL_NONE) + return m->npools; + + m->mode = svc_pool_map_choose_mode(); + + switch (m->mode) { + case SVC_POOL_PERCPU: + npools = svc_pool_map_init_percpu(m); + break; + case SVC_POOL_PERNODE: + npools = svc_pool_map_init_pernode(m); + break; + } + + if (npools < 0) { + /* default, or memory allocation failure */ + npools = 1; + m->mode = SVC_POOL_GLOBAL; + } + m->npools = npools; + + return m->npools; +} + +/* + * Set the current thread's cpus_allowed mask so that it + * will only run on cpus in the given pool. + * + * Returns 1 and fills in oldmask iff a cpumask was applied. + */ +static inline int +svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) +{ + struct svc_pool_map *m = &svc_pool_map; + unsigned int node; /* or cpu */ + + /* + * The caller checks for sv_nrpools > 1, which + * implies that we've been initialized and the + * map mode is not NONE. + */ + BUG_ON(m->mode == SVC_POOL_NONE); + + switch (m->mode) + { + default: + return 0; + case SVC_POOL_PERCPU: + node = m->pool_to[pidx]; + *oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(node)); + return 1; + case SVC_POOL_PERNODE: + node = m->pool_to[pidx]; + *oldmask = current->cpus_allowed; + set_cpus_allowed(current, node_to_cpumask(node)); + return 1; + } +} + +/* + * Use the mapping mode to choose a pool for a given CPU. + * Used when enqueueing an incoming RPC. Always returns + * a non-NULL pool pointer. + */ +struct svc_pool * +svc_pool_for_cpu(struct svc_serv *serv, int cpu) +{ + struct svc_pool_map *m = &svc_pool_map; + unsigned int pidx = 0; + + /* + * SVC_POOL_NONE happens in a pure client when + * lockd is brought up, so silently treat it the + * same as SVC_POOL_GLOBAL. + */ + + switch (m->mode) { + case SVC_POOL_PERCPU: + pidx = m->to_pool[cpu]; + break; + case SVC_POOL_PERNODE: + pidx = m->to_pool[cpu_to_node(cpu)]; + break; + } + return &serv->sv_pools[pidx % serv->sv_nrpools]; +} + + +/* * Create an RPC service */ -struct svc_serv * -svc_create(struct svc_program *prog, unsigned int bufsize) +static struct svc_serv * +__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, + void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; int vers; unsigned int xdrsize; + unsigned int i; if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; @@ -39,6 +283,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize) serv->sv_nrthreads = 1; serv->sv_stats = prog->pg_stats; serv->sv_bufsz = bufsize? bufsize : 4096; + serv->sv_shutdown = shutdown; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -53,20 +298,68 @@ svc_create(struct svc_program *prog, unsigned int bufsize) prog = prog->pg_next; } serv->sv_xdrsize = xdrsize; - INIT_LIST_HEAD(&serv->sv_threads); - INIT_LIST_HEAD(&serv->sv_sockets); INIT_LIST_HEAD(&serv->sv_tempsocks); INIT_LIST_HEAD(&serv->sv_permsocks); + init_timer(&serv->sv_temptimer); spin_lock_init(&serv->sv_lock); + serv->sv_nrpools = npools; + serv->sv_pools = + kcalloc(sizeof(struct svc_pool), serv->sv_nrpools, + GFP_KERNEL); + if (!serv->sv_pools) { + kfree(serv); + return NULL; + } + + for (i = 0; i < serv->sv_nrpools; i++) { + struct svc_pool *pool = &serv->sv_pools[i]; + + dprintk("initialising pool %u for %s\n", + i, serv->sv_name); + + pool->sp_id = i; + INIT_LIST_HEAD(&pool->sp_threads); + INIT_LIST_HEAD(&pool->sp_sockets); + INIT_LIST_HEAD(&pool->sp_all_threads); + spin_lock_init(&pool->sp_lock); + } + + /* Remove any stale portmap registrations */ svc_register(serv, 0, 0); return serv; } +struct svc_serv * +svc_create(struct svc_program *prog, unsigned int bufsize, + void (*shutdown)(struct svc_serv *serv)) +{ + return __svc_create(prog, bufsize, /*npools*/1, shutdown); +} + +struct svc_serv * +svc_create_pooled(struct svc_program *prog, unsigned int bufsize, + void (*shutdown)(struct svc_serv *serv), + svc_thread_fn func, int sig, struct module *mod) +{ + struct svc_serv *serv; + unsigned int npools = svc_pool_map_init(); + + serv = __svc_create(prog, bufsize, npools, shutdown); + + if (serv != NULL) { + serv->sv_function = func; + serv->sv_kill_signal = sig; + serv->sv_module = mod; + } + + return serv; +} + /* - * Destroy an RPC service + * Destroy an RPC service. Should be called with the BKL held */ void svc_destroy(struct svc_serv *serv) @@ -85,12 +378,17 @@ svc_destroy(struct svc_serv *serv) } else printk("svc_destroy: no threads for serv=%p!\n", serv); + del_timer_sync(&serv->sv_temptimer); + while (!list_empty(&serv->sv_tempsocks)) { svsk = list_entry(serv->sv_tempsocks.next, struct svc_sock, sk_list); svc_delete_socket(svsk); } + if (serv->sv_shutdown) + serv->sv_shutdown(serv); + while (!list_empty(&serv->sv_permsocks)) { svsk = list_entry(serv->sv_permsocks.next, struct svc_sock, @@ -102,6 +400,7 @@ svc_destroy(struct svc_serv *serv) /* Unregister service with the portmapper */ svc_register(serv, 0, 0); + kfree(serv->sv_pools); kfree(serv); } @@ -150,13 +449,18 @@ svc_release_buffer(struct svc_rqst *rqstp) } /* - * Create a server thread + * Create a thread in the given pool. Caller must hold BKL. + * On a NUMA or SMP machine, with a multi-pool serv, the thread + * will be restricted to run on the cpus belonging to the pool. */ -int -svc_create_thread(svc_thread_fn func, struct svc_serv *serv) +static int +__svc_create_thread(svc_thread_fn func, struct svc_serv *serv, + struct svc_pool *pool) { struct svc_rqst *rqstp; int error = -ENOMEM; + int have_oldmask = 0; + cpumask_t oldmask; rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); if (!rqstp) @@ -170,8 +474,21 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) goto out_thread; serv->sv_nrthreads++; + spin_lock_bh(&pool->sp_lock); + pool->sp_nrthreads++; + list_add(&rqstp->rq_all, &pool->sp_all_threads); + spin_unlock_bh(&pool->sp_lock); rqstp->rq_server = serv; + rqstp->rq_pool = pool; + + if (serv->sv_nrpools > 1) + have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); + error = kernel_thread((int (*)(void *)) func, rqstp, 0); + + if (have_oldmask) + set_cpus_allowed(current, oldmask); + if (error < 0) goto out_thread; svc_sock_update_bufs(serv); @@ -185,17 +502,136 @@ out_thread: } /* - * Destroy an RPC server thread + * Create a thread in the default pool. Caller must hold BKL. + */ +int +svc_create_thread(svc_thread_fn func, struct svc_serv *serv) +{ + return __svc_create_thread(func, serv, &serv->sv_pools[0]); +} + +/* + * Choose a pool in which to create a new thread, for svc_set_num_threads + */ +static inline struct svc_pool * +choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +{ + if (pool != NULL) + return pool; + + return &serv->sv_pools[(*state)++ % serv->sv_nrpools]; +} + +/* + * Choose a thread to kill, for svc_set_num_threads + */ +static inline struct task_struct * +choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +{ + unsigned int i; + struct task_struct *task = NULL; + + if (pool != NULL) { + spin_lock_bh(&pool->sp_lock); + } else { + /* choose a pool in round-robin fashion */ + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; + spin_lock_bh(&pool->sp_lock); + if (!list_empty(&pool->sp_all_threads)) + goto found_pool; + spin_unlock_bh(&pool->sp_lock); + } + return NULL; + } + +found_pool: + if (!list_empty(&pool->sp_all_threads)) { + struct svc_rqst *rqstp; + + /* + * Remove from the pool->sp_all_threads list + * so we don't try to kill it again. + */ + rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); + list_del_init(&rqstp->rq_all); + task = rqstp->rq_task; + } + spin_unlock_bh(&pool->sp_lock); + + return task; +} + +/* + * Create or destroy enough new threads to make the number + * of threads the given number. If `pool' is non-NULL, applies + * only to threads in that pool, otherwise round-robins between + * all pools. Must be called with a svc_get() reference and + * the BKL held. + * + * Destroying threads relies on the service threads filling in + * rqstp->rq_task, which only the nfs ones do. Assumes the serv + * has been created using svc_create_pooled(). + * + * Based on code that used to be in nfsd_svc() but tweaked + * to be pool-aware. + */ +int +svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct task_struct *victim; + int error = 0; + unsigned int state = serv->sv_nrthreads-1; + + if (pool == NULL) { + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); + } else { + spin_lock_bh(&pool->sp_lock); + nrservs -= pool->sp_nrthreads; + spin_unlock_bh(&pool->sp_lock); + } + + /* create new threads */ + while (nrservs > 0) { + nrservs--; + __module_get(serv->sv_module); + error = __svc_create_thread(serv->sv_function, serv, + choose_pool(serv, pool, &state)); + if (error < 0) { + module_put(serv->sv_module); + break; + } + } + /* destroy old threads */ + while (nrservs < 0 && + (victim = choose_victim(serv, pool, &state)) != NULL) { + send_sig(serv->sv_kill_signal, victim, 1); + nrservs++; + } + + return error; +} + +/* + * Called from a server thread as it's exiting. Caller must hold BKL. */ void svc_exit_thread(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; + struct svc_pool *pool = rqstp->rq_pool; svc_release_buffer(rqstp); kfree(rqstp->rq_resp); kfree(rqstp->rq_argp); kfree(rqstp->rq_auth_data); + + spin_lock_bh(&pool->sp_lock); + pool->sp_nrthreads--; + list_del(&rqstp->rq_all); + spin_unlock_bh(&pool->sp_lock); + kfree(rqstp); /* Release the server */ @@ -248,19 +684,20 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) * Process the RPC request. */ int -svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) +svc_process(struct svc_rqst *rqstp) { struct svc_program *progp; struct svc_version *versp = NULL; /* compiler food */ struct svc_procedure *procp = NULL; struct kvec * argv = &rqstp->rq_arg.head[0]; struct kvec * resv = &rqstp->rq_res.head[0]; + struct svc_serv *serv = rqstp->rq_server; kxdrproc_t xdr; - u32 *statp; - u32 dir, prog, vers, proc, - auth_stat, rpc_stat; + __be32 *statp; + u32 dir, prog, vers, proc; + __be32 auth_stat, rpc_stat; int auth_res; - u32 *accept_statp; + __be32 *accept_statp; rpc_stat = rpc_success; @@ -284,16 +721,16 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) rqstp->rq_sendfile_ok = 1; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) - svc_putu32(resv, 0); + svc_putnl(resv, 0); rqstp->rq_xid = svc_getu32(argv); svc_putu32(resv, rqstp->rq_xid); - dir = ntohl(svc_getu32(argv)); - vers = ntohl(svc_getu32(argv)); + dir = svc_getnl(argv); + vers = svc_getnl(argv); /* First words of reply: */ - svc_putu32(resv, xdr_one); /* REPLY */ + svc_putnl(resv, 1); /* REPLY */ if (dir != 0) /* direction != CALL */ goto err_bad_dir; @@ -303,11 +740,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) /* Save position in case we later decide to reject: */ accept_statp = resv->iov_base + resv->iov_len; - svc_putu32(resv, xdr_zero); /* ACCEPT */ + svc_putnl(resv, 0); /* ACCEPT */ - rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */ - rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */ - rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ + rqstp->rq_prog = prog = svc_getnl(argv); /* program number */ + rqstp->rq_vers = vers = svc_getnl(argv); /* version number */ + rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */ progp = serv->sv_program; @@ -361,7 +798,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) /* Build the reply header. */ statp = resv->iov_base +resv->iov_len; - svc_putu32(resv, rpc_success); /* RPC_SUCCESS */ + svc_putnl(resv, RPC_SUCCESS); /* Bump per-procedure stats counter */ procp->pc_count++; @@ -439,10 +876,10 @@ err_bad_dir: err_bad_rpc: serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, xdr_one); /* REJECT */ - svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */ - svc_putu32(resv, xdr_two); /* Only RPCv2 supported */ - svc_putu32(resv, xdr_two); + svc_putnl(resv, 1); /* REJECT */ + svc_putnl(resv, 0); /* RPC_MISMATCH */ + svc_putnl(resv, 2); /* Only RPCv2 supported */ + svc_putnl(resv, 2); goto sendit; err_bad_auth: @@ -450,15 +887,15 @@ err_bad_auth: serv->sv_stats->rpcbadauth++; /* Restore write pointer to location of accept status: */ xdr_ressize_check(rqstp, accept_statp); - svc_putu32(resv, xdr_one); /* REJECT */ - svc_putu32(resv, xdr_one); /* AUTH_ERROR */ - svc_putu32(resv, auth_stat); /* status */ + svc_putnl(resv, 1); /* REJECT */ + svc_putnl(resv, 1); /* AUTH_ERROR */ + svc_putnl(resv, ntohl(auth_stat)); /* status */ goto sendit; err_bad_prog: dprintk("svc: unknown program %d\n", prog); serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_prog_unavail); + svc_putnl(resv, RPC_PROG_UNAVAIL); goto sendit; err_bad_vers: @@ -466,9 +903,9 @@ err_bad_vers: printk("svc: unknown version (%d)\n", vers); #endif serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_prog_mismatch); - svc_putu32(resv, htonl(progp->pg_lovers)); - svc_putu32(resv, htonl(progp->pg_hivers)); + svc_putnl(resv, RPC_PROG_MISMATCH); + svc_putnl(resv, progp->pg_lovers); + svc_putnl(resv, progp->pg_hivers); goto sendit; err_bad_proc: @@ -476,7 +913,7 @@ err_bad_proc: printk("svc: unknown procedure (%d)\n", proc); #endif serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_proc_unavail); + svc_putnl(resv, RPC_PROC_UNAVAIL); goto sendit; err_garbage: @@ -486,6 +923,6 @@ err_garbage: rpc_stat = rpc_garbage_args; err_bad: serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_stat); + svc_putnl(resv, ntohl(rpc_stat)); goto sendit; } diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 5b28c617680..8f2320aded5 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -35,14 +35,14 @@ static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = { }; int -svc_authenticate(struct svc_rqst *rqstp, u32 *authp) +svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) { rpc_authflavor_t flavor; struct auth_ops *aops; *authp = rpc_auth_ok; - flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0])); + flavor = svc_getnl(&rqstp->rq_arg.head[0]); dprintk("svc: svc_authenticate (%d)\n", flavor); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 7e5707e2d6b..40d41a2831d 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -145,7 +145,7 @@ static void ip_map_request(struct cache_detail *cd, { char text_addr[20]; struct ip_map *im = container_of(h, struct ip_map, h); - __u32 addr = im->m_addr.s_addr; + __be32 addr = im->m_addr.s_addr; snprintf(text_addr, 20, "%u.%u.%u.%u", ntohl(addr) >> 24 & 0xff, @@ -249,10 +249,10 @@ static int ip_map_show(struct seq_file *m, seq_printf(m, "%s %d.%d.%d.%d %s\n", im->m_class, - htonl(addr.s_addr) >> 24 & 0xff, - htonl(addr.s_addr) >> 16 & 0xff, - htonl(addr.s_addr) >> 8 & 0xff, - htonl(addr.s_addr) >> 0 & 0xff, + ntohl(addr.s_addr) >> 24 & 0xff, + ntohl(addr.s_addr) >> 16 & 0xff, + ntohl(addr.s_addr) >> 8 & 0xff, + ntohl(addr.s_addr) >> 0 & 0xff, dom ); return 0; @@ -348,12 +348,9 @@ int auth_unix_forget_old(struct auth_domain *dom) struct auth_domain *auth_unix_lookup(struct in_addr addr) { - struct ip_map key, *ipm; + struct ip_map *ipm; struct auth_domain *rv; - strcpy(key.m_class, "nfsd"); - key.m_addr = addr; - ipm = ip_map_lookup("nfsd", addr); if (!ipm) @@ -410,7 +407,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) } static int -svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -427,7 +424,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) *authp = rpc_autherr_badcred; return SVC_DENIED; } - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { dprintk("svc: bad null verf\n"); *authp = rpc_autherr_badverf; return SVC_DENIED; @@ -441,8 +438,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) return SVC_DROP; /* kmalloc failure - client must retry */ /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); + svc_putnl(resv, RPC_AUTH_NULL); + svc_putnl(resv, 0); return SVC_OK; } @@ -472,7 +469,7 @@ struct auth_ops svcauth_null = { static int -svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -488,31 +485,31 @@ svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) svc_getu32(argv); /* length */ svc_getu32(argv); /* time stamp */ - slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */ + slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */ if (slen > 64 || (len -= (slen + 3)*4) < 0) goto badcred; - argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */ + argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ argv->iov_len -= slen*4; - cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */ - cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */ - slen = ntohl(svc_getu32(argv)); /* gids length */ + cred->cr_uid = svc_getnl(argv); /* uid */ + cred->cr_gid = svc_getnl(argv); /* gid */ + slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; cred->cr_group_info = groups_alloc(slen); if (cred->cr_group_info == NULL) return SVC_DROP; for (i = 0; i < slen; i++) - GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); + GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; } /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); + svc_putnl(resv, RPC_AUTH_NULL); + svc_putnl(resv, 0); return SVC_OK; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d9a95732df4..cba85d19522 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -31,6 +31,7 @@ #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/skbuff.h> +#include <linux/file.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> @@ -45,13 +46,16 @@ /* SMP locking strategy: * - * svc_serv->sv_lock protects most stuff for that service. + * svc_pool->sp_lock protects most of the fields of that pool. + * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. + * when both need to be taken (rare), svc_serv->sv_lock is first. + * BKL protects svc_serv->sv_nrthread. + * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list + * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. * * Some flags can be set to certain values at any time * providing that certain rules are followed: * - * SK_BUSY can be set to 0 at any time. - * svc_sock_enqueue must be called afterwards * SK_CONN, SK_DATA, can be set or cleared at any time. * after a set, svc_sock_enqueue must be called. * after a clear, the socket must be read/accepted @@ -73,23 +77,30 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); +/* apparently the "standard" is that clients close + * idle connections after 5 minutes, servers after + * 6 minutes + * http://www.connectathon.org/talks96/nfstcp.pdf + */ +static int svc_conn_age_period = 6*60; + /* - * Queue up an idle server thread. Must have serv->sv_lock held. + * Queue up an idle server thread. Must have pool->sp_lock held. * Note: this is really a stack rather than a queue, so that we only - * use as many different threads as we need, and the rest don't polute + * use as many different threads as we need, and the rest don't pollute * the cache. */ static inline void -svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) +svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) { - list_add(&rqstp->rq_list, &serv->sv_threads); + list_add(&rqstp->rq_list, &pool->sp_threads); } /* - * Dequeue an nfsd thread. Must have serv->sv_lock held. + * Dequeue an nfsd thread. Must have pool->sp_lock held. */ static inline void -svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) +svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) { list_del(&rqstp->rq_list); } @@ -140,7 +151,9 @@ static void svc_sock_enqueue(struct svc_sock *svsk) { struct svc_serv *serv = svsk->sk_server; + struct svc_pool *pool; struct svc_rqst *rqstp; + int cpu; if (!(svsk->sk_flags & ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) @@ -148,10 +161,14 @@ svc_sock_enqueue(struct svc_sock *svsk) if (test_bit(SK_DEAD, &svsk->sk_flags)) return; - spin_lock_bh(&serv->sv_lock); + cpu = get_cpu(); + pool = svc_pool_for_cpu(svsk->sk_server, cpu); + put_cpu(); + + spin_lock_bh(&pool->sp_lock); - if (!list_empty(&serv->sv_threads) && - !list_empty(&serv->sv_sockets)) + if (!list_empty(&pool->sp_threads) && + !list_empty(&pool->sp_sockets)) printk(KERN_ERR "svc_sock_enqueue: threads and sockets both waiting??\n"); @@ -161,73 +178,79 @@ svc_sock_enqueue(struct svc_sock *svsk) goto out_unlock; } - if (test_bit(SK_BUSY, &svsk->sk_flags)) { - /* Don't enqueue socket while daemon is receiving */ + /* Mark socket as busy. It will remain in this state until the + * server has processed all pending data and put the socket back + * on the idle list. We update SK_BUSY atomically because + * it also guards against trying to enqueue the svc_sock twice. + */ + if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { + /* Don't enqueue socket while already enqueued */ dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); goto out_unlock; } + BUG_ON(svsk->sk_pool != NULL); + svsk->sk_pool = pool; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - if (((svsk->sk_reserved + serv->sv_bufsz)*2 + if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 > svc_sock_wspace(svsk)) && !test_bit(SK_CLOSE, &svsk->sk_flags) && !test_bit(SK_CONN, &svsk->sk_flags)) { /* Don't enqueue while not enough space for reply */ dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", - svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz, + svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, svc_sock_wspace(svsk)); + svsk->sk_pool = NULL; + clear_bit(SK_BUSY, &svsk->sk_flags); goto out_unlock; } clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - /* Mark socket as busy. It will remain in this state until the - * server has processed all pending data and put the socket back - * on the idle list. - */ - set_bit(SK_BUSY, &svsk->sk_flags); - if (!list_empty(&serv->sv_threads)) { - rqstp = list_entry(serv->sv_threads.next, + if (!list_empty(&pool->sp_threads)) { + rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); dprintk("svc: socket %p served by daemon %p\n", svsk->sk_sk, rqstp); - svc_serv_dequeue(serv, rqstp); + svc_thread_dequeue(pool, rqstp); if (rqstp->rq_sock) printk(KERN_ERR "svc_sock_enqueue: server %p, rq_sock=%p!\n", rqstp, rqstp->rq_sock); rqstp->rq_sock = svsk; - svsk->sk_inuse++; + atomic_inc(&svsk->sk_inuse); rqstp->rq_reserved = serv->sv_bufsz; - svsk->sk_reserved += rqstp->rq_reserved; + atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); + BUG_ON(svsk->sk_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: socket %p put into queue\n", svsk->sk_sk); - list_add_tail(&svsk->sk_ready, &serv->sv_sockets); + list_add_tail(&svsk->sk_ready, &pool->sp_sockets); + BUG_ON(svsk->sk_pool != pool); } out_unlock: - spin_unlock_bh(&serv->sv_lock); + spin_unlock_bh(&pool->sp_lock); } /* - * Dequeue the first socket. Must be called with the serv->sv_lock held. + * Dequeue the first socket. Must be called with the pool->sp_lock held. */ static inline struct svc_sock * -svc_sock_dequeue(struct svc_serv *serv) +svc_sock_dequeue(struct svc_pool *pool) { struct svc_sock *svsk; - if (list_empty(&serv->sv_sockets)) + if (list_empty(&pool->sp_sockets)) return NULL; - svsk = list_entry(serv->sv_sockets.next, + svsk = list_entry(pool->sp_sockets.next, struct svc_sock, sk_ready); list_del_init(&svsk->sk_ready); dprintk("svc: socket %p dequeued, inuse=%d\n", - svsk->sk_sk, svsk->sk_inuse); + svsk->sk_sk, atomic_read(&svsk->sk_inuse)); return svsk; } @@ -241,6 +264,7 @@ svc_sock_dequeue(struct svc_serv *serv) static inline void svc_sock_received(struct svc_sock *svsk) { + svsk->sk_pool = NULL; clear_bit(SK_BUSY, &svsk->sk_flags); svc_sock_enqueue(svsk); } @@ -262,10 +286,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space) if (space < rqstp->rq_reserved) { struct svc_sock *svsk = rqstp->rq_sock; - spin_lock_bh(&svsk->sk_server->sv_lock); - svsk->sk_reserved -= (rqstp->rq_reserved - space); + atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); rqstp->rq_reserved = space; - spin_unlock_bh(&svsk->sk_server->sv_lock); svc_sock_enqueue(svsk); } @@ -277,17 +299,11 @@ void svc_reserve(struct svc_rqst *rqstp, int space) static inline void svc_sock_put(struct svc_sock *svsk) { - struct svc_serv *serv = svsk->sk_server; - - spin_lock_bh(&serv->sv_lock); - if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) { - spin_unlock_bh(&serv->sv_lock); + if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) { dprintk("svc: releasing dead socket\n"); sock_release(svsk->sk_sock); kfree(svsk); } - else - spin_unlock_bh(&serv->sv_lock); } static void @@ -321,25 +337,33 @@ svc_sock_release(struct svc_rqst *rqstp) /* * External function to wake up a server waiting for data + * This really only makes sense for services like lockd + * which have exactly one thread anyway. */ void svc_wake_up(struct svc_serv *serv) { struct svc_rqst *rqstp; - - spin_lock_bh(&serv->sv_lock); - if (!list_empty(&serv->sv_threads)) { - rqstp = list_entry(serv->sv_threads.next, - struct svc_rqst, - rq_list); - dprintk("svc: daemon %p woken up.\n", rqstp); - /* - svc_serv_dequeue(serv, rqstp); - rqstp->rq_sock = NULL; - */ - wake_up(&rqstp->rq_wait); + unsigned int i; + struct svc_pool *pool; + + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[i]; + + spin_lock_bh(&pool->sp_lock); + if (!list_empty(&pool->sp_threads)) { + rqstp = list_entry(pool->sp_threads.next, + struct svc_rqst, + rq_list); + dprintk("svc: daemon %p woken up.\n", rqstp); + /* + svc_thread_dequeue(pool, rqstp); + rqstp->rq_sock = NULL; + */ + wake_up(&rqstp->rq_wait); + } + spin_unlock_bh(&pool->sp_lock); } - spin_unlock_bh(&serv->sv_lock); } /* @@ -388,7 +412,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); + len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; slen -= xdr->head[0].iov_len; @@ -400,7 +424,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) while (pglen > 0) { if (slen == size) flags = 0; - result = sock->ops->sendpage(sock, *ppage, base, size, flags); + result = kernel_sendpage(sock, *ppage, base, size, flags); if (result > 0) len += result; if (result != size) @@ -413,7 +437,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) } /* send tail */ if (xdr->tail[0].iov_len) { - result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], + result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), xdr->tail[0].iov_len, 0); @@ -429,18 +453,60 @@ out: } /* + * Report socket names for nfsdfs + */ +static int one_sock_name(char *buf, struct svc_sock *svsk) +{ + int len; + + switch(svsk->sk_sk->sk_family) { + case AF_INET: + len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n", + svsk->sk_sk->sk_protocol==IPPROTO_UDP? + "udp" : "tcp", + NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr), + inet_sk(svsk->sk_sk)->num); + break; + default: + len = sprintf(buf, "*unknown-%d*\n", + svsk->sk_sk->sk_family); + } + return len; +} + +int +svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) +{ + struct svc_sock *svsk, *closesk = NULL; + int len = 0; + + if (!serv) + return 0; + spin_lock(&serv->sv_lock); + list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { + int onelen = one_sock_name(buf+len, svsk); + if (toclose && strcmp(toclose, buf+len) == 0) + closesk = svsk; + else + len += onelen; + } + spin_unlock(&serv->sv_lock); + if (closesk) + svc_delete_socket(closesk); + return len; +} +EXPORT_SYMBOL(svc_sock_names); + +/* * Check input queue length */ static int svc_recv_available(struct svc_sock *svsk) { - mm_segment_t oldfs; struct socket *sock = svsk->sk_sock; int avail, err; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); - set_fs(oldfs); + err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); return (err >= 0)? avail : err; } @@ -472,7 +538,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) * at accept time. FIXME */ alen = sizeof(rqstp->rq_addr); - sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); + kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); @@ -560,7 +626,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* udp sockets need large rcvbuf as all pending * requests are still in that buffer. sndbuf must * also be large enough that there is enough space - * for one reply per thread. + * for one reply per thread. We count all threads + * rather than threads in a particular pool, which + * provides an upper bound on the number of threads + * which will access the socket. */ svc_sock_setbufsize(svsk->sk_sock, (serv->sv_nrthreads+3) * serv->sv_bufsz, @@ -758,7 +827,6 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; - const struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; @@ -766,29 +834,23 @@ svc_tcp_accept(struct svc_sock *svsk) if (!sock) return; - err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); - if (err) { + clear_bit(SK_CONN, &svsk->sk_flags); + err = kernel_accept(sock, &newsock, O_NONBLOCK); + if (err < 0) { if (err == -ENOMEM) printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); - return; - } - - dprintk("svc: tcp_accept %p allocated\n", newsock); - newsock->ops = ops = sock->ops; - - clear_bit(SK_CONN, &svsk->sk_flags); - if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { - if (err != -EAGAIN && net_ratelimit()) + else if (err != -EAGAIN && net_ratelimit()) printk(KERN_WARNING "%s: accept failed (err %d)!\n", serv->sv_name, -err); - goto failed; /* aborted connection or whatever */ + return; } + set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); slen = sizeof(sin); - err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); + err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); if (err < 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: peername failed (err %d)!\n", @@ -854,7 +916,7 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_sock, sk_list); set_bit(SK_CLOSE, &svsk->sk_flags); - svsk->sk_inuse ++; + atomic_inc(&svsk->sk_inuse); } spin_unlock_bh(&serv->sv_lock); @@ -912,6 +974,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) /* sndbuf needs to have room for one request * per thread, otherwise we can stall even when the * network isn't a bottleneck. + * + * We count all threads rather than threads in a + * particular pool, which provides an upper bound + * on the number of threads which will access the socket. + * * rcvbuf just needs to be able to hold a few requests. * Normally they will be removed from the queue * as soon a a complete request arrives. @@ -1040,7 +1107,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp) { struct xdr_buf *xbufp = &rqstp->rq_res; int sent; - u32 reclen; + __be32 reclen; /* Set up the first element of the reply kvec. * Any other kvecs that may be in use have been taken @@ -1127,12 +1194,16 @@ svc_sock_update_bufs(struct svc_serv *serv) } /* - * Receive the next request on any socket. + * Receive the next request on any socket. This code is carefully + * organised not to touch any cachelines in the shared svc_serv + * structure, only cachelines in the local svc_pool. */ int -svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) +svc_recv(struct svc_rqst *rqstp, long timeout) { struct svc_sock *svsk =NULL; + struct svc_serv *serv = rqstp->rq_server; + struct svc_pool *pool = rqstp->rq_pool; int len; int pages; struct xdr_buf *arg; @@ -1182,32 +1253,15 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) if (signalled()) return -EINTR; - spin_lock_bh(&serv->sv_lock); - if (!list_empty(&serv->sv_tempsocks)) { - svsk = list_entry(serv->sv_tempsocks.next, - struct svc_sock, sk_list); - /* apparently the "standard" is that clients close - * idle connections after 5 minutes, servers after - * 6 minutes - * http://www.connectathon.org/talks96/nfstcp.pdf - */ - if (get_seconds() - svsk->sk_lastrecv < 6*60 - || test_bit(SK_BUSY, &svsk->sk_flags)) - svsk = NULL; - } - if (svsk) { - set_bit(SK_BUSY, &svsk->sk_flags); - set_bit(SK_CLOSE, &svsk->sk_flags); - rqstp->rq_sock = svsk; - svsk->sk_inuse++; - } else if ((svsk = svc_sock_dequeue(serv)) != NULL) { + spin_lock_bh(&pool->sp_lock); + if ((svsk = svc_sock_dequeue(pool)) != NULL) { rqstp->rq_sock = svsk; - svsk->sk_inuse++; + atomic_inc(&svsk->sk_inuse); rqstp->rq_reserved = serv->sv_bufsz; - svsk->sk_reserved += rqstp->rq_reserved; + atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); } else { /* No data pending. Go to sleep */ - svc_serv_enqueue(serv, rqstp); + svc_thread_enqueue(pool, rqstp); /* * We have to be able to interrupt this wait @@ -1215,26 +1269,26 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) */ set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&rqstp->rq_wait, &wait); - spin_unlock_bh(&serv->sv_lock); + spin_unlock_bh(&pool->sp_lock); schedule_timeout(timeout); try_to_freeze(); - spin_lock_bh(&serv->sv_lock); + spin_lock_bh(&pool->sp_lock); remove_wait_queue(&rqstp->rq_wait, &wait); if (!(svsk = rqstp->rq_sock)) { - svc_serv_dequeue(serv, rqstp); - spin_unlock_bh(&serv->sv_lock); + svc_thread_dequeue(pool, rqstp); + spin_unlock_bh(&pool->sp_lock); dprintk("svc: server %p, no data yet\n", rqstp); return signalled()? -EINTR : -EAGAIN; } } - spin_unlock_bh(&serv->sv_lock); + spin_unlock_bh(&pool->sp_lock); - dprintk("svc: server %p, socket %p, inuse=%d\n", - rqstp, svsk, svsk->sk_inuse); + dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", + rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); len = svsk->sk_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); @@ -1245,13 +1299,7 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) return -EAGAIN; } svsk->sk_lastrecv = get_seconds(); - if (test_bit(SK_TEMP, &svsk->sk_flags)) { - /* push active sockets to end of list */ - spin_lock_bh(&serv->sv_lock); - if (!list_empty(&svsk->sk_list)) - list_move_tail(&svsk->sk_list, &serv->sv_tempsocks); - spin_unlock_bh(&serv->sv_lock); - } + clear_bit(SK_OLD, &svsk->sk_flags); rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; rqstp->rq_chandle.defer = svc_defer; @@ -1311,6 +1359,58 @@ svc_send(struct svc_rqst *rqstp) } /* + * Timer function to close old temporary sockets, using + * a mark-and-sweep algorithm. + */ +static void +svc_age_temp_sockets(unsigned long closure) +{ + struct svc_serv *serv = (struct svc_serv *)closure; + struct svc_sock *svsk; + struct list_head *le, *next; + LIST_HEAD(to_be_aged); + + dprintk("svc_age_temp_sockets\n"); + + if (!spin_trylock_bh(&serv->sv_lock)) { + /* busy, try again 1 sec later */ + dprintk("svc_age_temp_sockets: busy\n"); + mod_timer(&serv->sv_temptimer, jiffies + HZ); + return; + } + + list_for_each_safe(le, next, &serv->sv_tempsocks) { + svsk = list_entry(le, struct svc_sock, sk_list); + + if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) + continue; + if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags)) + continue; + atomic_inc(&svsk->sk_inuse); + list_move(le, &to_be_aged); + set_bit(SK_CLOSE, &svsk->sk_flags); + set_bit(SK_DETACHED, &svsk->sk_flags); + } + spin_unlock_bh(&serv->sv_lock); + + while (!list_empty(&to_be_aged)) { + le = to_be_aged.next; + /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ + list_del_init(le); + svsk = list_entry(le, struct svc_sock, sk_list); + + dprintk("queuing svsk %p for closing, %lu seconds old\n", + svsk, get_seconds() - svsk->sk_lastrecv); + + /* a thread will dequeue and close it soon */ + svc_sock_enqueue(svsk); + svc_sock_put(svsk); + } + + mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); +} + +/* * Initialize socket for RPC use and create svc_sock struct * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. */ @@ -1347,7 +1447,9 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; svsk->sk_server = serv; + atomic_set(&svsk->sk_inuse, 0); svsk->sk_lastrecv = get_seconds(); + spin_lock_init(&svsk->sk_defer_lock); INIT_LIST_HEAD(&svsk->sk_deferred); INIT_LIST_HEAD(&svsk->sk_ready); mutex_init(&svsk->sk_mutex); @@ -1363,6 +1465,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, set_bit(SK_TEMP, &svsk->sk_flags); list_add(&svsk->sk_list, &serv->sv_tempsocks); serv->sv_tmpcnt++; + if (serv->sv_temptimer.function == NULL) { + /* setup timer to age temp sockets */ + setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, + (unsigned long)serv); + mod_timer(&serv->sv_temptimer, + jiffies + svc_conn_age_period * HZ); + } } else { clear_bit(SK_TEMP, &svsk->sk_flags); list_add(&svsk->sk_list, &serv->sv_permsocks); @@ -1377,6 +1486,38 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, return svsk; } +int svc_addsock(struct svc_serv *serv, + int fd, + char *name_return, + int *proto) +{ + int err = 0; + struct socket *so = sockfd_lookup(fd, &err); + struct svc_sock *svsk = NULL; + + if (!so) + return err; + if (so->sk->sk_family != AF_INET) + err = -EAFNOSUPPORT; + else if (so->sk->sk_protocol != IPPROTO_TCP && + so->sk->sk_protocol != IPPROTO_UDP) + err = -EPROTONOSUPPORT; + else if (so->state > SS_UNCONNECTED) + err = -EISCONN; + else { + svsk = svc_setup_socket(serv, so, &err, 1); + if (svsk) + err = 0; + } + if (err) { + sockfd_put(so); + return err; + } + if (proto) *proto = so->sk->sk_protocol; + return one_sock_name(name_return, svsk); +} +EXPORT_SYMBOL_GPL(svc_addsock); + /* * Create socket for RPC service. */ @@ -1403,17 +1544,15 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) return error; - if (sin != NULL) { - if (type == SOCK_STREAM) - sock->sk->sk_reuse = 1; /* allow address reuse */ - error = sock->ops->bind(sock, (struct sockaddr *) sin, - sizeof(*sin)); - if (error < 0) - goto bummer; - } + if (type == SOCK_STREAM) + sock->sk->sk_reuse = 1; /* allow address reuse */ + error = kernel_bind(sock, (struct sockaddr *) sin, + sizeof(*sin)); + if (error < 0) + goto bummer; if (protocol == IPPROTO_TCP) { - if ((error = sock->ops->listen(sock, 64)) < 0) + if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } @@ -1446,15 +1585,25 @@ svc_delete_socket(struct svc_sock *svsk) spin_lock_bh(&serv->sv_lock); - list_del_init(&svsk->sk_list); - list_del_init(&svsk->sk_ready); + if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) + list_del_init(&svsk->sk_list); + /* + * We used to delete the svc_sock from whichever list + * it's sk_ready node was on, but we don't actually + * need to. This is because the only time we're called + * while still attached to a queue, the queue itself + * is about to be destroyed (in svc_destroy). + */ if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) if (test_bit(SK_TEMP, &svsk->sk_flags)) serv->sv_tmpcnt--; - if (!svsk->sk_inuse) { + if (!atomic_read(&svsk->sk_inuse)) { spin_unlock_bh(&serv->sv_lock); - sock_release(svsk->sk_sock); + if (svsk->sk_sock->file) + sockfd_put(svsk->sk_sock); + else + sock_release(svsk->sk_sock); kfree(svsk); } else { spin_unlock_bh(&serv->sv_lock); @@ -1485,7 +1634,6 @@ svc_makesock(struct svc_serv *serv, int protocol, unsigned short port) static void svc_revisit(struct cache_deferred_req *dreq, int too_many) { struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); - struct svc_serv *serv = dreq->owner; struct svc_sock *svsk; if (too_many) { @@ -1496,9 +1644,9 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) dprintk("revisit queued\n"); svsk = dr->svsk; dr->svsk = NULL; - spin_lock_bh(&serv->sv_lock); + spin_lock_bh(&svsk->sk_defer_lock); list_add(&dr->handle.recent, &svsk->sk_deferred); - spin_unlock_bh(&serv->sv_lock); + spin_unlock_bh(&svsk->sk_defer_lock); set_bit(SK_DEFERRED, &svsk->sk_flags); svc_sock_enqueue(svsk); svc_sock_put(svsk); @@ -1530,10 +1678,8 @@ svc_defer(struct cache_req *req) dr->argslen = rqstp->rq_arg.len >> 2; memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); } - spin_lock_bh(&rqstp->rq_server->sv_lock); - rqstp->rq_sock->sk_inuse++; + atomic_inc(&rqstp->rq_sock->sk_inuse); dr->svsk = rqstp->rq_sock; - spin_unlock_bh(&rqstp->rq_server->sv_lock); dr->handle.revisit = svc_revisit; return &dr->handle; @@ -1560,11 +1706,10 @@ static int svc_deferred_recv(struct svc_rqst *rqstp) static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) { struct svc_deferred_req *dr = NULL; - struct svc_serv *serv = svsk->sk_server; if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) return NULL; - spin_lock_bh(&serv->sv_lock); + spin_lock_bh(&svsk->sk_defer_lock); clear_bit(SK_DEFERRED, &svsk->sk_flags); if (!list_empty(&svsk->sk_deferred)) { dr = list_entry(svsk->sk_deferred.next, @@ -1573,6 +1718,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) list_del_init(&dr->handle.recent); set_bit(SK_DEFERRED, &svsk->sk_flags); } - spin_unlock_bh(&serv->sv_lock); + spin_unlock_bh(&svsk->sk_defer_lock); return dr; } diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index bcbdf6430d5..8142fdb8a93 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -19,8 +19,6 @@ #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xprt.h> -#include <linux/sunrpc/timer.h> #define RPC_RTO_MAX (60*HZ) #define RPC_RTO_INIT (HZ/5) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6ac45103a27..9022eb8b37e 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -18,8 +18,8 @@ /* * XDR functions for basic NFS types */ -u32 * -xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) +__be32 * +xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) { unsigned int quadlen = XDR_QUADLEN(obj->len); @@ -29,8 +29,8 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) return p + XDR_QUADLEN(obj->len); } -u32 * -xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) +__be32 * +xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) { unsigned int len; @@ -55,7 +55,7 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) * Returns the updated current XDR buffer position * */ -u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes) +__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int nbytes) { if (likely(nbytes != 0)) { unsigned int quadlen = XDR_QUADLEN(nbytes); @@ -79,21 +79,21 @@ EXPORT_SYMBOL(xdr_encode_opaque_fixed); * * Returns the updated current XDR buffer position */ -u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes) +__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) { *p++ = htonl(nbytes); return xdr_encode_opaque_fixed(p, ptr, nbytes); } EXPORT_SYMBOL(xdr_encode_opaque); -u32 * -xdr_encode_string(u32 *p, const char *string) +__be32 * +xdr_encode_string(__be32 *p, const char *string) { return xdr_encode_array(p, string, strlen(string)); } -u32 * -xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) +__be32 * +xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) { unsigned int len; @@ -432,7 +432,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) * of the buffer length, and takes care of adjusting the kvec * length for us. */ -void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) +void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; @@ -440,8 +440,8 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; - xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len); - xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len); + xdr->p = (__be32 *)((char *)iov->iov_base + iov->iov_len); + xdr->end = (__be32 *)((char *)iov->iov_base + scratch_len); BUG_ON(iov->iov_len > scratch_len); if (p != xdr->p && p != NULL) { @@ -465,10 +465,10 @@ EXPORT_SYMBOL(xdr_init_encode); * bytes of data. If so, update the total xdr_buf length, and * adjust the length of the current kvec. */ -uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) +__be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) { - uint32_t *p = xdr->p; - uint32_t *q; + __be32 *p = xdr->p; + __be32 *q; /* align nbytes on the next 32-bit boundary */ nbytes += 3; @@ -524,7 +524,7 @@ EXPORT_SYMBOL(xdr_write_pages); * @buf: pointer to XDR buffer from which to decode data * @p: current pointer inside XDR buffer */ -void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) +void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { struct kvec *iov = buf->head; unsigned int len = iov->iov_len; @@ -534,7 +534,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) xdr->buf = buf; xdr->iov = iov; xdr->p = p; - xdr->end = (uint32_t *)((char *)iov->iov_base + len); + xdr->end = (__be32 *)((char *)iov->iov_base + len); } EXPORT_SYMBOL(xdr_init_decode); @@ -548,10 +548,10 @@ EXPORT_SYMBOL(xdr_init_decode); * If so return the current pointer, then update the current * pointer position. */ -uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) +__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { - uint32_t *p = xdr->p; - uint32_t *q = p + XDR_QUADLEN(nbytes); + __be32 *p = xdr->p; + __be32 *q = p + XDR_QUADLEN(nbytes); if (unlikely(q > xdr->end || q < p)) return NULL; @@ -599,8 +599,8 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) * Position current pointer at beginning of tail, and * set remaining message length. */ - xdr->p = (uint32_t *)((char *)iov->iov_base + padding); - xdr->end = (uint32_t *)((char *)iov->iov_base + end); + xdr->p = (__be32 *)((char *)iov->iov_base + padding); + xdr->end = (__be32 *)((char *)iov->iov_base + end); } EXPORT_SYMBOL(xdr_read_pages); @@ -624,8 +624,8 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) */ if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) len = PAGE_CACHE_SIZE - xdr->buf->page_base; - xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base); - xdr->end = (uint32_t *)((char *)xdr->p + len); + xdr->p = (__be32 *)(kaddr + xdr->buf->page_base); + xdr->end = (__be32 *)((char *)xdr->p + len); } EXPORT_SYMBOL(xdr_enter_page); @@ -743,7 +743,7 @@ out: int xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) { - u32 raw; + __be32 raw; int status; status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); @@ -756,7 +756,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) int xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) { - u32 raw = htonl(obj); + __be32 raw = htonl(obj); return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e8c2bc4977f..80857470dc1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task) dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); - if (!xprt->addr.sin_port) { + if (!xprt_bound(xprt)) { task->tk_status = -EIO; return; } @@ -585,13 +585,6 @@ static void xprt_connect_status(struct rpc_task *task) task->tk_pid, -task->tk_status, task->tk_client->cl_server); xprt_release_write(xprt, task); task->tk_status = -EIO; - return; - } - - /* if soft mounted, just cause this RPC to fail */ - if (RPC_IS_SOFT(task)) { - xprt_release_write(xprt, task); - task->tk_status = -EIO; } } @@ -601,7 +594,7 @@ static void xprt_connect_status(struct rpc_task *task) * @xid: RPC XID of incoming reply * */ -struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) +struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { struct list_head *pos; @@ -808,7 +801,7 @@ void xprt_reserve(struct rpc_task *task) spin_unlock(&xprt->reserve_lock); } -static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) +static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) { return xprt->xid++; } @@ -829,6 +822,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; + xprt_reset_majortimeo(req); dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, ntohl(req->rq_xid)); } @@ -887,16 +881,32 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i to->to_exponential = 0; } -static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) +/** + * xprt_create_transport - create an RPC transport + * @proto: requested transport protocol + * @ap: remote peer address + * @size: length of address + * @to: timeout parameters + * + */ +struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) { int result; struct rpc_xprt *xprt; struct rpc_rqst *req; - if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) + if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) { + dprintk("RPC: xprt_create_transport: no memory\n"); return ERR_PTR(-ENOMEM); - - xprt->addr = *ap; + } + if (size <= sizeof(xprt->addr)) { + memcpy(&xprt->addr, ap, size); + xprt->addrlen = size; + } else { + kfree(xprt); + dprintk("RPC: xprt_create_transport: address too large\n"); + return ERR_PTR(-EBADF); + } switch (proto) { case IPPROTO_UDP: @@ -908,14 +918,15 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc default: printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", proto); - result = -EIO; - break; + return ERR_PTR(-EIO); } if (result) { kfree(xprt); + dprintk("RPC: xprt_create_transport: failed, %d\n", result); return ERR_PTR(result); } + kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); @@ -928,6 +939,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); rpc_init_wait_queue(&xprt->sending, "xprt_sending"); rpc_init_wait_queue(&xprt->resend, "xprt_resend"); @@ -941,41 +953,43 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); - - return xprt; -} -/** - * xprt_create_proto - create an RPC client transport - * @proto: requested transport protocol - * @sap: remote peer's address - * @to: timeout parameters for new transport - * - */ -struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) -{ - struct rpc_xprt *xprt; - - xprt = xprt_setup(proto, sap, to); - if (IS_ERR(xprt)) - dprintk("RPC: xprt_create_proto failed\n"); - else - dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); return xprt; } /** * xprt_destroy - destroy an RPC transport, killing off all requests. - * @xprt: transport to destroy + * @kref: kref for the transport to destroy * */ -int xprt_destroy(struct rpc_xprt *xprt) +static void xprt_destroy(struct kref *kref) { + struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref); + dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); xprt->ops->destroy(xprt); kfree(xprt); +} - return 0; +/** + * xprt_put - release a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +void xprt_put(struct rpc_xprt *xprt) +{ + kref_put(&xprt->kref, xprt_destroy); +} + +/** + * xprt_get - return a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) +{ + kref_get(&xprt->kref); + return xprt; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 441bd53f5ec..28100e01922 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif +static void xs_format_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) { + snprintf(buf, 20, "%u.%u.%u.%u", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + if (xprt->prot == IPPROTO_UDP) + xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; + else + xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) { + snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; +} + +static void xs_free_peer_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) @@ -174,7 +215,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; int err, ret = 0; - ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); if (unlikely(!sock)) return -ENOTCONN; @@ -207,7 +247,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a base &= ~PAGE_CACHE_MASK; } - sendpage = sock->ops->sendpage ? : sock_no_sendpage; do { int flags = XS_SENDMSG_FLAGS; @@ -220,10 +259,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a if (pglen != len || xdr->tail[0].iov_len != 0) flags |= MSG_MORE; - /* Hmm... We might be dealing with highmem pages */ - if (PageHighMem(*ppage)) - sendpage = sock_no_sendpage; - err = sendpage(sock, *ppage, base, len, flags); + err = kernel_sendpage(sock, *ppage, base, len, flags); if (ret == 0) ret = err; else if (err > 0) @@ -300,7 +336,7 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), xdr, req->rq_bytes_sent); + xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -490,6 +526,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xprt_disconnect(xprt); xs_close(xprt); + xs_free_peer_addresses(xprt); kfree(xprt->slot); } @@ -511,7 +548,8 @@ static void xs_udp_data_ready(struct sock *sk, int len) struct rpc_rqst *rovr; struct sk_buff *skb; int err, repsize, copied; - u32 _xid, *xp; + u32 _xid; + __be32 *xp; read_lock(&sk->sk_callback_lock); dprintk("RPC: xs_udp_data_ready...\n"); @@ -965,6 +1003,19 @@ static unsigned short xs_get_random_port(void) } /** + * xs_print_peer_address - format an IPv4 address for printing + * @xprt: generic transport + * @format: flags field indicating which parts of the address to render + */ +static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format) +{ + if (xprt->address_strings[format] != NULL) + return xprt->address_strings[format]; + else + return "unprintable"; +} + +/** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport * @port: new port number @@ -972,8 +1023,11 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { + struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - xprt->addr.sin_port = htons(port); + + sap->sin_port = htons(port); } static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) @@ -986,7 +1040,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) do { myaddr.sin_port = htons(port); - err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, + err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); if (err == 0) { xprt->port = port; @@ -1016,11 +1070,9 @@ static void xs_udp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); - /* Start by resetting any existing state */ xs_close(xprt); @@ -1034,6 +1086,9 @@ static void xs_udp_connect_worker(void *args) goto out; } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1081,7 +1136,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) */ memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; - result = sock->ops->connect(sock, &any, sizeof(any), 0); + result = kernel_connect(sock, &any, sizeof(any), 0); if (result) dprintk("RPC: AF_UNSPEC connect return code %d\n", result); @@ -1099,11 +1154,9 @@ static void xs_tcp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); - if (!xprt->sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { @@ -1119,6 +1172,9 @@ static void xs_tcp_connect_worker(void *args) /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt); + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1151,8 +1207,8 @@ static void xs_tcp_connect_worker(void *args) /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), O_NONBLOCK); + status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, + xprt->addrlen, O_NONBLOCK); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { @@ -1260,8 +1316,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1276,8 +1334,10 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1298,8 +1358,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - - dprintk("RPC: setting up udp-ipv4 transport...\n"); + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_udp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); @@ -1307,10 +1366,12 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - xprt->prot = IPPROTO_UDP; + if (ntohs(addr->sin_port != 0)) + xprt_set_bound(xprt); xprt->port = xs_get_random_port(); + + xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -1327,6 +1388,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } @@ -1339,8 +1404,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - - dprintk("RPC: setting up tcp-ipv4 transport...\n"); + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_tcp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); @@ -1348,10 +1412,12 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - xprt->prot = IPPROTO_TCP; + if (ntohs(addr->sin_port) != 0) + xprt_set_bound(xprt); xprt->port = xs_get_random_port(); + + xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); @@ -1367,5 +1433,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } |