diff options
Diffstat (limited to 'net')
75 files changed, 2845 insertions, 3723 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index cb3475ea6fd..34cf1ee014b 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -82,13 +82,13 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, static int fc_rebuild_header(struct sk_buff *skb) { +#ifdef CONFIG_INET struct fch_hdr *fch=(struct fch_hdr *)skb->data; struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); if(fcllc->ethertype != htons(ETH_P_IP)) { printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); return 0; } -#ifdef CONFIG_INET return arp_find(fch->daddr, skb); #else return 0; diff --git a/net/9p/Makefile b/net/9p/Makefile index 519219480db..1041b7bd12e 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -4,10 +4,9 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 9pnet-objs := \ mod.o \ client.o \ - conv.o \ error.o \ - fcprint.o \ util.o \ + protocol.o \ trans_fd.o \ 9pnet_virtio-objs := \ diff --git a/net/9p/client.c b/net/9p/client.c index e053e06028a..bbac2f72b4d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -33,12 +33,9 @@ #include <linux/uaccess.h> #include <net/9p/9p.h> #include <linux/parser.h> -#include <net/9p/transport.h> #include <net/9p/client.h> - -static struct p9_fid *p9_fid_create(struct p9_client *clnt); -static void p9_fid_destroy(struct p9_fid *fid); -static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); +#include <net/9p/transport.h> +#include "protocol.h" /* * Client Option Parsing (code inspired by NFS code) @@ -59,6 +56,9 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); + /** * v9fs_parse_options - parse mount options into session structure * @options: options string passed from mount @@ -124,31 +124,585 @@ static int parse_opts(char *opts, struct p9_client *clnt) return ret; } +/** + * p9_tag_alloc - lookup/allocate a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + * this is a simple array lookup, but will grow the + * request_slots as necessary to accomodate transaction + * ids which did not previously have a slot. + * + * this code relies on the client spinlock to manage locks, its + * possible we should switch to something else, but I'd rather + * stick with something low-overhead for the common case. + * + */ + +static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +{ + unsigned long flags; + int row, col; + struct p9_req_t *req; + + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; + + if (tag >= c->max_tag) { + spin_lock_irqsave(&c->lock, flags); + /* check again since original check was outside of lock */ + while (tag >= c->max_tag) { + row = (tag / P9_ROW_MAXTAG); + c->reqs[row] = kcalloc(P9_ROW_MAXTAG, + sizeof(struct p9_req_t), GFP_ATOMIC); + + if (!c->reqs[row]) { + printk(KERN_ERR "Couldn't grow tag array\n"); + return ERR_PTR(-ENOMEM); + } + for (col = 0; col < P9_ROW_MAXTAG; col++) { + c->reqs[row][col].status = REQ_STATUS_IDLE; + c->reqs[row][col].tc = NULL; + } + c->max_tag += P9_ROW_MAXTAG; + } + spin_unlock_irqrestore(&c->lock, flags); + } + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + req = &c->reqs[row][col]; + if (!req->tc) { + req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); + if (!req->wq) { + printk(KERN_ERR "Couldn't grow tag array\n"); + return ERR_PTR(-ENOMEM); + } + init_waitqueue_head(req->wq); + req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + if ((!req->tc) || (!req->rc)) { + printk(KERN_ERR "Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + return ERR_PTR(-ENOMEM); + } + req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); + req->tc->capacity = c->msize; + req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); + req->rc->capacity = c->msize; + } + + p9pdu_reset(req->tc); + p9pdu_reset(req->rc); + + req->flush_tag = 0; + req->tc->tag = tag-1; + req->status = REQ_STATUS_ALLOC; + + return &c->reqs[row][col]; +} + +/** + * p9_tag_lookup - lookup a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + */ + +struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) +{ + int row, col; + + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; + + BUG_ON(tag >= c->max_tag); + + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + return &c->reqs[row][col]; +} +EXPORT_SYMBOL(p9_tag_lookup); + +/** + * p9_tag_init - setup tags structure and contents + * @tags: tags structure from the client struct + * + * This initializes the tags structure for each client instance. + * + */ + +static int p9_tag_init(struct p9_client *c) +{ + int err = 0; + + c->tagpool = p9_idpool_create(); + if (IS_ERR(c->tagpool)) { + err = PTR_ERR(c->tagpool); + c->tagpool = NULL; + goto error; + } + + p9_idpool_get(c->tagpool); /* reserve tag 0 */ + + c->max_tag = 0; +error: + return err; +} /** - * p9_client_rpc - sends 9P request and waits until a response is available. - * The function can be interrupted. - * @c: client data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored + * p9_tag_cleanup - cleans up tags structure and reclaims resources + * @tags: tags structure from the client struct + * + * This frees resources associated with the tags structure + * */ +static void p9_tag_cleanup(struct p9_client *c) +{ + int row, col; + + /* check to insure all requests are idle */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + if (c->reqs[row][col].status != REQ_STATUS_IDLE) { + P9_DPRINTK(P9_DEBUG_MUX, + "Attempting to cleanup non-free tag %d,%d\n", + row, col); + /* TODO: delay execution of cleanup */ + return; + } + } + } + + if (c->tagpool) + p9_idpool_destroy(c->tagpool); + + /* free requests associated with tags */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + kfree(c->reqs[row][col].wq); + kfree(c->reqs[row][col].tc); + kfree(c->reqs[row][col].rc); + } + kfree(c->reqs[row]); + } + c->max_tag = 0; +} + +/** + * p9_free_req - free a request and clean-up as necessary + * c: client state + * r: request to release + * + */ + +static void p9_free_req(struct p9_client *c, struct p9_req_t *r) +{ + int tag = r->tc->tag; + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + + r->status = REQ_STATUS_IDLE; + if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) + p9_idpool_put(tag, c->tagpool); + + /* if this was a flush request we have to free response fcall */ + if (r->rc->id == P9_RFLUSH) { + kfree(r->tc); + kfree(r->rc); + } +} + +/** + * p9_client_cb - call back from transport to client + * c: client state + * req: request received + * + */ +void p9_client_cb(struct p9_client *c, struct p9_req_t *req) +{ + struct p9_req_t *other_req; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + + if (req->status == REQ_STATUS_ERROR) + wake_up(req->wq); + + if (req->flush_tag) { /* flush receive path */ + P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag); + spin_lock_irqsave(&c->lock, flags); + other_req = p9_tag_lookup(c, req->flush_tag); + if (other_req->status != REQ_STATUS_FLSH) /* stale flush */ + spin_unlock_irqrestore(&c->lock, flags); + else { + other_req->status = REQ_STATUS_FLSHD; + spin_unlock_irqrestore(&c->lock, flags); + wake_up(other_req->wq); + } + p9_free_req(c, req); + } else { /* normal receive path */ + P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag); + spin_lock_irqsave(&c->lock, flags); + if (req->status != REQ_STATUS_FLSHD) + req->status = REQ_STATUS_RCVD; + spin_unlock_irqrestore(&c->lock, flags); + wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); + } +} +EXPORT_SYMBOL(p9_client_cb); + +/** + * p9_parse_header - parse header arguments out of a packet + * @pdu: packet to parse + * @size: size of packet + * @type: type of request + * @tag: tag of packet + * @rewind: set if we need to rewind offset afterwards + */ + int -p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, - struct p9_fcall **rc) +p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, + int rewind) { - return c->trans->rpc(c->trans, tc, rc); + int8_t r_type; + int16_t r_tag; + int32_t r_size; + int offset = pdu->offset; + int err; + + pdu->offset = 0; + if (pdu->size == 0) + pdu->size = 7; + + err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag); + if (err) + goto rewind_and_exit; + + pdu->size = r_size; + pdu->id = r_type; + pdu->tag = r_tag; + + P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, + pdu->id, pdu->tag); + + if (type) + *type = r_type; + if (tag) + *tag = r_tag; + if (size) + *size = r_size; + + +rewind_and_exit: + if (rewind) + pdu->offset = offset; + return err; } +EXPORT_SYMBOL(p9_parse_header); + +/** + * p9_check_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) +{ + int8_t type; + int err; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type == P9_RERROR) { + int ecode; + char *ename; + + err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", + err); + return err; + } + + if (c->dotu) + err = -ecode; + + if (!err) { + err = p9_errstr2errno(ename, strlen(ename)); + + /* string match failed */ + if (!err) + err = -ESERVERFAULT; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + + kfree(ename); + } else + err = 0; + + return err; +} + +/** + * p9_client_flush - flush (cancel) a request + * c: client state + * req: request to cancel + * + * This sents a flush for a particular requests and links + * the flush request to the original request. The current + * code only supports a single flush request although the protocol + * allows for multiple flush requests to be sent for a single request. + * + */ + +static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) +{ + struct p9_req_t *req; + int16_t oldtag; + int err; + + err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1); + if (err) + return err; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); + + req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->flush_tag = oldtag; + + /* we don't free anything here because RPC isn't complete */ + return 0; +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int tag, err; + struct p9_req_t *req; + unsigned long flags; + int sigpending; + int flushed = 0; + + P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); + + if (c->status != Connected) + return ERR_PTR(-EIO); + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + tag = P9_NOTAG; + if (type != P9_TVERSION) { + tag = p9_idpool_get(c->tagpool); + if (tag < 0) + return ERR_PTR(-ENOMEM); + } + + req = p9_tag_alloc(c, tag); + if (IS_ERR(req)) + return req; + + /* marshall the data */ + p9pdu_prepare(req->tc, tag, type); + va_start(ap, fmt); + err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap); + va_end(ap); + p9pdu_finalize(req->tc); + + err = c->trans_mod->request(c, req); + if (err < 0) { + c->status = Disconnected; + goto reterr; + } + + /* if it was a flush we just transmitted, return our tag */ + if (type == P9_TFLUSH) + return req; +again: + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n", + req->wq, tag, err, flushed); + + if (req->status == REQ_STATUS_ERROR) { + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } else if (err == -ERESTARTSYS && flushed) { + P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n"); + goto again; + } else if (req->status == REQ_STATUS_FLSHD) { + P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n"); + err = -ERESTARTSYS; + } + + if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) { + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + spin_lock_irqsave(&c->lock, flags); + if (req->status == REQ_STATUS_SENT) + req->status = REQ_STATUS_FLSH; + spin_unlock_irqrestore(&c->lock, flags); + sigpending = 1; + flushed = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) { + err = p9_client_flush(c, req); + if (err == 0) + goto again; + } + } + + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + + if (err < 0) + goto reterr; + + err = p9_check_errors(c, req); + if (!err) { + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + return req; + } + +reterr: + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, + err); + p9_free_req(c, req); + return ERR_PTR(err); +} + +static struct p9_fid *p9_fid_create(struct p9_client *clnt) +{ + int err; + struct p9_fid *fid; + + P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); + fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); + if (!fid) + return ERR_PTR(-ENOMEM); + + fid->fid = p9_idpool_get(clnt->fidpool); + if (fid->fid < 0) { + err = -ENOSPC; + goto error; + } + + memset(&fid->qid, 0, sizeof(struct p9_qid)); + fid->mode = -1; + fid->rdir_fpos = 0; + fid->uid = current->fsuid; + fid->clnt = clnt; + fid->aux = NULL; + + spin_lock(&clnt->lock); + list_add(&fid->flist, &clnt->fidlist); + spin_unlock(&clnt->lock); + + return fid; + +error: + kfree(fid); + return ERR_PTR(err); +} + +static void p9_fid_destroy(struct p9_fid *fid) +{ + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); + clnt = fid->clnt; + p9_idpool_put(fid->fid, clnt->fidpool); + spin_lock(&clnt->lock); + list_del(&fid->flist); + spin_unlock(&clnt->lock); + kfree(fid); +} + +int p9_client_version(struct p9_client *c) +{ + int err = 0; + struct p9_req_t *req; + char *version; + int msize; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n", + c->msize, c->dotu); + req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, + c->dotu ? "9P2000.u" : "9P2000"); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version); + if (err) { + P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); + p9pdu_dump(1, req->rc); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); + if (!memcmp(version, "9P2000.u", 8)) + c->dotu = 1; + else if (!memcmp(version, "9P2000", 6)) + c->dotu = 0; + else { + err = -EREMOTEIO; + goto error; + } + + if (msize < c->msize) + c->msize = msize; + +error: + kfree(version); + p9_free_req(c, req); + + return err; +} +EXPORT_SYMBOL(p9_client_version); struct p9_client *p9_client_create(const char *dev_name, char *options) { - int err, n; + int err; struct p9_client *clnt; - struct p9_fcall *tc, *rc; - struct p9_str *version; err = 0; - tc = NULL; - rc = NULL; clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); if (!clnt) return ERR_PTR(-ENOMEM); @@ -164,6 +718,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto error; } + p9_tag_init(clnt); + err = parse_opts(options, clnt); if (err < 0) goto error; @@ -175,53 +731,23 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n", + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n", clnt, clnt->trans_mod, clnt->msize, clnt->dotu); - - clnt->trans = clnt->trans_mod->create(dev_name, options, clnt->msize, - clnt->dotu); - if (IS_ERR(clnt->trans)) { - err = PTR_ERR(clnt->trans); - clnt->trans = NULL; + err = clnt->trans_mod->create(clnt, dev_name, options); + if (err) goto error; - } if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; - tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000"); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); + err = p9_client_version(clnt); if (err) goto error; - version = &rc->params.rversion.version; - if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8)) - clnt->dotu = 1; - else if (version->len == 6 && !memcmp(version->str, "9P2000", 6)) - clnt->dotu = 0; - else { - err = -EREMOTEIO; - goto error; - } - - n = rc->params.rversion.msize; - if (n < clnt->msize) - clnt->msize = n; - - kfree(tc); - kfree(rc); return clnt; error: - kfree(tc); - kfree(rc); p9_client_destroy(clnt); return ERR_PTR(err); } @@ -231,13 +757,10 @@ void p9_client_destroy(struct p9_client *clnt) { struct p9_fid *fid, *fidptr; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt); - if (clnt->trans) { - clnt->trans->close(clnt->trans); - kfree(clnt->trans); - clnt->trans = NULL; - } + if (clnt->trans_mod) + clnt->trans_mod->close(clnt); v9fs_put_trans(clnt->trans_mod); @@ -247,6 +770,8 @@ void p9_client_destroy(struct p9_client *clnt) if (clnt->fidpool) p9_idpool_destroy(clnt->fidpool); + p9_tag_cleanup(clnt); + kfree(clnt); } EXPORT_SYMBOL(p9_client_destroy); @@ -254,7 +779,7 @@ EXPORT_SYMBOL(p9_client_destroy); void p9_client_disconnect(struct p9_client *clnt) { P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); - clnt->trans->status = Disconnected; + clnt->status = Disconnected; } EXPORT_SYMBOL(p9_client_disconnect); @@ -262,14 +787,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname) { int err; - struct p9_fcall *tc, *rc; + struct p9_req_t *req; struct p9_fid *fid; + struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p afid %d uname %s aname %s\n", - clnt, afid?afid->fid:-1, uname, aname); + P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); err = 0; - tc = NULL; - rc = NULL; fid = p9_fid_create(clnt); if (IS_ERR(fid)) { @@ -278,73 +802,77 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname, - n_uname, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid, + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) + err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", + qid.type, qid.path, qid.version); - memmove(&fid->qid, &rc->params.rattach.qid, sizeof(struct p9_qid)); - kfree(tc); - kfree(rc); + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); + + p9_free_req(clnt, req); return fid; error: - kfree(tc); - kfree(rc); if (fid) p9_fid_destroy(fid); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, - u32 n_uname, char *aname) +struct p9_fid * +p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) { int err; - struct p9_fcall *tc, *rc; - struct p9_fid *fid; + struct p9_req_t *req; + struct p9_qid qid; + struct p9_fid *afid; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p uname %s aname %s\n", clnt, uname, - aname); + P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname); err = 0; - tc = NULL; - rc = NULL; - fid = p9_fid_create(clnt); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - fid = NULL; + afid = p9_fid_create(clnt); + if (IS_ERR(afid)) { + err = PTR_ERR(afid); + afid = NULL; goto error; } - tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TAUTH, "dss?d", + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) + err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); goto error; + } - memmove(&fid->qid, &rc->params.rauth.qid, sizeof(struct p9_qid)); - kfree(tc); - kfree(rc); - return fid; + P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", + qid.type, qid.path, qid.version); + + memmove(&afid->qid, &qid, sizeof(struct p9_qid)); + p9_free_req(clnt, req); + return afid; error: - kfree(tc); - kfree(rc); - if (fid) - p9_fid_destroy(fid); + if (afid) + p9_fid_destroy(afid); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_auth); @@ -353,15 +881,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int clone) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; struct p9_fid *fid; + struct p9_qid *wqids; + struct p9_req_t *req; + int16_t nwqids, count; - P9_DPRINTK(P9_DEBUG_9P, "fid %d nwname %d wname[0] %s\n", - oldfid->fid, nwname, wnames?wnames[0]:NULL); err = 0; - tc = NULL; - rc = NULL; clnt = oldfid->clnt; if (clone) { fid = p9_fid_create(clnt); @@ -375,53 +901,49 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, } else fid = oldfid; - tc = p9_create_twalk(oldfid->fid, fid->fid, nwname, wnames); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", + oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); + + req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, + nwname, wnames); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); + err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids); if (err) { - if (rc && rc->id == P9_RWALK) - goto clunk_fid; - else - goto error; + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; } + p9_free_req(clnt, req); - if (rc->params.rwalk.nwqid != nwname) { + P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); + + if (nwqids != nwname) { err = -ENOENT; goto clunk_fid; } + for (count = 0; count < nwqids; count++) + P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", + count, wqids[count].type, wqids[count].path, + wqids[count].version); + if (nwname) - memmove(&fid->qid, - &rc->params.rwalk.wqids[rc->params.rwalk.nwqid - 1], - sizeof(struct p9_qid)); + memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); else fid->qid = oldfid->qid; - kfree(tc); - kfree(rc); return fid; clunk_fid: - kfree(tc); - kfree(rc); - rc = NULL; - tc = p9_create_tclunk(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - p9_client_rpc(clnt, tc, &rc); + p9_client_clunk(fid); + fid = NULL; error: - kfree(tc); - kfree(rc); if (fid && (fid != oldfid)) p9_fid_destroy(fid); @@ -432,35 +954,39 @@ EXPORT_SYMBOL(p9_client_walk); int p9_client_open(struct p9_fid *fid, int mode) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; - P9_DPRINTK(P9_DEBUG_9P, "fid %d mode %d\n", fid->fid, mode); + P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; if (fid->mode != -1) return -EINVAL; - tc = p9_create_topen(fid->fid, mode); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", + qid.type, qid.path, qid.version, iounit); fid->mode = mode; - fid->iounit = rc->params.ropen.iounit; + fid->iounit = iounit; -done: - kfree(tc); - kfree(rc); +free_and_error: + p9_free_req(clnt, req); +error: return err; } EXPORT_SYMBOL(p9_client_open); @@ -469,37 +995,41 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; - P9_DPRINTK(P9_DEBUG_9P, "fid %d name %s perm %d mode %d\n", fid->fid, - name, perm, mode); + P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", + fid->fid, name, perm, mode); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; if (fid->mode != -1) return -EINVAL; - tc = p9_create_tcreate(fid->fid, name, perm, mode, extension, - clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm, + mode, extension); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", + qid.type, qid.path, qid.version, iounit); fid->mode = mode; - fid->iounit = rc->params.ropen.iounit; + fid->iounit = iounit; -done: - kfree(tc); - kfree(rc); +free_and_error: + p9_free_req(clnt, req); +error: return err; } EXPORT_SYMBOL(p9_client_fcreate); @@ -507,31 +1037,25 @@ EXPORT_SYMBOL(p9_client_fcreate); int p9_client_clunk(struct p9_fid *fid) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - tc = p9_create_tclunk(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); + p9_free_req(clnt, req); p9_fid_destroy(fid); -done: - kfree(tc); - kfree(rc); +error: return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -539,157 +1063,41 @@ EXPORT_SYMBOL(p9_client_clunk); int p9_client_remove(struct p9_fid *fid) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - tc = p9_create_tremove(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); + p9_free_req(clnt, req); p9_fid_destroy(fid); -done: - kfree(tc); - kfree(rc); - return err; -} -EXPORT_SYMBOL(p9_client_remove); - -int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count) -{ - int err, n, rsize, total; - struct p9_fcall *tc, *rc; - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); - err = 0; - tc = NULL; - rc = NULL; - clnt = fid->clnt; - total = 0; - - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; - - do { - if (count < rsize) - rsize = count; - - tc = p9_create_tread(fid->fid, offset, rsize); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - n = rc->params.rread.count; - if (n > count) - n = count; - - memmove(data, rc->params.rread.data, n); - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0 && n == rsize); - - return total; - error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_read); - -int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count) -{ - int err, n, rsize, total; - struct p9_fcall *tc, *rc; - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); - err = 0; - tc = NULL; - rc = NULL; - clnt = fid->clnt; - total = 0; - - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; - - do { - if (count < rsize) - rsize = count; - - tc = p9_create_twrite(fid->fid, offset, rsize, data); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - n = rc->params.rread.count; - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0); - - return total; - -error: - kfree(tc); - kfree(rc); - return err; -} -EXPORT_SYMBOL(p9_client_write); +EXPORT_SYMBOL(p9_client_remove); int -p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count) +p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, + u32 count) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + int err, rsize, total; struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (long long unsigned) offset, count); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; total = 0; @@ -697,63 +1105,57 @@ p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count) if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; - do { - if (count < rsize) - rsize = count; + if (count < rsize) + rsize = count; - tc = p9_create_tread(fid->fid, offset, rsize); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - n = rc->params.rread.count; - if (n > count) - n = count; + if (data) { + memmove(data, dataptr, count); + data += count; + } - err = copy_to_user(data, rc->params.rread.data, n); + if (udata) { + err = copy_to_user(udata, dataptr, count); if (err) { err = -EFAULT; - goto error; + goto free_and_error; } + } - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0 && n == rsize); - - return total; + p9_free_req(clnt, req); + return count; +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_uread); +EXPORT_SYMBOL(p9_client_read); int -p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, - u32 count) +p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, + u64 offset, u32 count) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + int err, rsize, total; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); + P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", + fid->fid, (long long unsigned) offset, count); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; total = 0; @@ -761,325 +1163,114 @@ p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; - do { - if (count < rsize) - rsize = count; - - tc = p9_create_twrite_u(fid->fid, offset, rsize, data); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + if (count < rsize) + rsize = count; + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, + rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, + rsize, udata); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + err = p9pdu_readf(req->rc, clnt->dotu, "d", &count); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } - n = rc->params.rread.count; - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0); + P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); - return total; + p9_free_req(clnt, req); + return count; +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_uwrite); - -int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count) -{ - int n, total; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); - n = 0; - total = 0; - while (count) { - n = p9_client_read(fid, data, offset, count); - if (n <= 0) - break; - - data += n; - offset += n; - count -= n; - total += n; - } - - if (n < 0) - total = n; - - return total; -} -EXPORT_SYMBOL(p9_client_readn); +EXPORT_SYMBOL(p9_client_write); -struct p9_stat *p9_client_stat(struct p9_fid *fid) +struct p9_wstat *p9_client_stat(struct p9_fid *fid) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; - struct p9_stat *ret; + struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL); + struct p9_req_t *req; + u16 ignored; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); + + if (!ret) + return ERR_PTR(-ENOMEM); - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; - ret = NULL; clnt = fid->clnt; - tc = p9_create_tstat(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - ret = p9_clone_stat(&rc->params.rstat.stat, clnt->dotu); - if (IS_ERR(ret)) { - err = PTR_ERR(ret); - ret = NULL; - goto error; + err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret); + if (err) { + ret = ERR_PTR(err); + p9pdu_dump(1, req->rc); + goto free_and_error; } - kfree(tc); - kfree(rc); - return ret; - + P9_DPRINTK(P9_DEBUG_9P, + "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + "<<< uid=%d gid=%d n_muid=%d\n", + ret->size, ret->type, ret->dev, ret->qid.type, + ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, ret->length, ret->name, + ret->uid, ret->gid, ret->muid, ret->extension, + ret->n_uid, ret->n_gid, ret->n_muid); + +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); - kfree(ret); - return ERR_PTR(err); + return ret; } EXPORT_SYMBOL(p9_client_stat); int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) { int err; - struct p9_fcall *tc, *rc; + struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, + " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + " uid=%d gid=%d n_muid=%d\n", + wst->size, wst->type, wst->dev, wst->qid.type, + wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, wst->length, wst->name, + wst->uid, wst->gid, wst->muid, wst->extension, + wst->n_uid, wst->n_gid, wst->n_muid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - tc = p9_create_twstat(fid->fid, wst, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; - } - - err = p9_client_rpc(clnt, tc, &rc); - -done: - kfree(tc); - kfree(rc); - return err; -} -EXPORT_SYMBOL(p9_client_wstat); - -struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset) -{ - int err, n, m; - struct p9_fcall *tc, *rc; - struct p9_client *clnt; - struct p9_stat st, *ret; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu\n", fid->fid, - (long long unsigned) offset); - err = 0; - tc = NULL; - rc = NULL; - ret = NULL; - clnt = fid->clnt; - - /* if the offset is below or above the current response, free it */ - if (offset < fid->rdir_fpos || (fid->rdir_fcall && - offset >= fid->rdir_fpos+fid->rdir_fcall->params.rread.count)) { - fid->rdir_pos = 0; - if (fid->rdir_fcall) - fid->rdir_fpos += fid->rdir_fcall->params.rread.count; - - kfree(fid->rdir_fcall); - fid->rdir_fcall = NULL; - if (offset < fid->rdir_fpos) - fid->rdir_fpos = 0; - } - - if (!fid->rdir_fcall) { - n = fid->iounit; - if (!n || n > clnt->msize-P9_IOHDRSZ) - n = clnt->msize - P9_IOHDRSZ; - - while (1) { - if (fid->rdir_fcall) { - fid->rdir_fpos += - fid->rdir_fcall->params.rread.count; - kfree(fid->rdir_fcall); - fid->rdir_fcall = NULL; - } - - tc = p9_create_tread(fid->fid, fid->rdir_fpos, n); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - n = rc->params.rread.count; - if (n == 0) - goto done; - - fid->rdir_fcall = rc; - rc = NULL; - if (offset >= fid->rdir_fpos && - offset < fid->rdir_fpos+n) - break; - } - - fid->rdir_pos = 0; - } - - m = offset - fid->rdir_fpos; - if (m < 0) - goto done; - - n = p9_deserialize_stat(fid->rdir_fcall->params.rread.data + m, - fid->rdir_fcall->params.rread.count - m, &st, clnt->dotu); - - if (!n) { - err = -EIO; - goto error; - } - - fid->rdir_pos += n; - st.size = n; - ret = p9_clone_stat(&st, clnt->dotu); - if (IS_ERR(ret)) { - err = PTR_ERR(ret); - ret = NULL; - goto error; - } - -done: - kfree(tc); - kfree(rc); - return ret; - -error: - kfree(tc); - kfree(rc); - kfree(ret); - return ERR_PTR(err); -} -EXPORT_SYMBOL(p9_client_dirread); - -static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu) -{ - int n; - char *p; - struct p9_stat *ret; - - n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len + - st->muid.len; - - if (dotu) - n += st->extension.len; - - ret = kmalloc(n, GFP_KERNEL); - if (!ret) - return ERR_PTR(-ENOMEM); - - memmove(ret, st, sizeof(struct p9_stat)); - p = ((char *) ret) + sizeof(struct p9_stat); - memmove(p, st->name.str, st->name.len); - ret->name.str = p; - p += st->name.len; - memmove(p, st->uid.str, st->uid.len); - ret->uid.str = p; - p += st->uid.len; - memmove(p, st->gid.str, st->gid.len); - ret->gid.str = p; - p += st->gid.len; - memmove(p, st->muid.str, st->muid.len); - ret->muid.str = p; - p += st->muid.len; - - if (dotu) { - memmove(p, st->extension.str, st->extension.len); - ret->extension.str = p; - p += st->extension.len; - } - - return ret; -} - -static struct p9_fid *p9_fid_create(struct p9_client *clnt) -{ - int err; - struct p9_fid *fid; - - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); - fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); - if (!fid) - return ERR_PTR(-ENOMEM); - - fid->fid = p9_idpool_get(clnt->fidpool); - if (fid->fid < 0) { - err = -ENOSPC; + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - memset(&fid->qid, 0, sizeof(struct p9_qid)); - fid->mode = -1; - fid->rdir_fpos = 0; - fid->rdir_pos = 0; - fid->rdir_fcall = NULL; - fid->uid = current->fsuid; - fid->clnt = clnt; - fid->aux = NULL; - - spin_lock(&clnt->lock); - list_add(&fid->flist, &clnt->fidlist); - spin_unlock(&clnt->lock); - - return fid; + P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); + p9_free_req(clnt, req); error: - kfree(fid); - return ERR_PTR(err); -} - -static void p9_fid_destroy(struct p9_fid *fid) -{ - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); - clnt = fid->clnt; - p9_idpool_put(fid->fid, clnt->fidpool); - spin_lock(&clnt->lock); - list_del(&fid->flist); - spin_unlock(&clnt->lock); - kfree(fid->rdir_fcall); - kfree(fid); + return err; } +EXPORT_SYMBOL(p9_client_wstat); diff --git a/net/9p/conv.c b/net/9p/conv.c deleted file mode 100644 index 5ad3a3bd73b..00000000000 --- a/net/9p/conv.c +++ /dev/null @@ -1,1054 +0,0 @@ -/* - * net/9p/conv.c - * - * 9P protocol conversion functions - * - * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/idr.h> -#include <linux/uaccess.h> -#include <net/9p/9p.h> - -/* - * Buffer to help with string parsing - */ -struct cbuf { - unsigned char *sp; - unsigned char *p; - unsigned char *ep; -}; - -static inline void buf_init(struct cbuf *buf, void *data, int datalen) -{ - buf->sp = buf->p = data; - buf->ep = data + datalen; -} - -static inline int buf_check_overflow(struct cbuf *buf) -{ - return buf->p > buf->ep; -} - -static int buf_check_size(struct cbuf *buf, int len) -{ - if (buf->p + len > buf->ep) { - if (buf->p < buf->ep) { - P9_EPRINTK(KERN_ERR, - "buffer overflow: want %d has %d\n", len, - (int)(buf->ep - buf->p)); - dump_stack(); - buf->p = buf->ep + 1; - } - - return 0; - } - - return 1; -} - -static void *buf_alloc(struct cbuf *buf, int len) -{ - void *ret = NULL; - - if (buf_check_size(buf, len)) { - ret = buf->p; - buf->p += len; - } - - return ret; -} - -static void buf_put_int8(struct cbuf *buf, u8 val) -{ - if (buf_check_size(buf, 1)) { - buf->p[0] = val; - buf->p++; - } -} - -static void buf_put_int16(struct cbuf *buf, u16 val) -{ - if (buf_check_size(buf, 2)) { - *(__le16 *) buf->p = cpu_to_le16(val); - buf->p += 2; - } -} - -static void buf_put_int32(struct cbuf *buf, u32 val) -{ - if (buf_check_size(buf, 4)) { - *(__le32 *)buf->p = cpu_to_le32(val); - buf->p += 4; - } -} - -static void buf_put_int64(struct cbuf *buf, u64 val) -{ - if (buf_check_size(buf, 8)) { - *(__le64 *)buf->p = cpu_to_le64(val); - buf->p += 8; - } -} - -static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) -{ - char *ret; - - ret = NULL; - if (buf_check_size(buf, slen + 2)) { - buf_put_int16(buf, slen); - ret = buf->p; - memcpy(buf->p, s, slen); - buf->p += slen; - } - - return ret; -} - -static u8 buf_get_int8(struct cbuf *buf) -{ - u8 ret = 0; - - if (buf_check_size(buf, 1)) { - ret = buf->p[0]; - buf->p++; - } - - return ret; -} - -static u16 buf_get_int16(struct cbuf *buf) -{ - u16 ret = 0; - - if (buf_check_size(buf, 2)) { - ret = le16_to_cpu(*(__le16 *)buf->p); - buf->p += 2; - } - - return ret; -} - -static u32 buf_get_int32(struct cbuf *buf) -{ - u32 ret = 0; - - if (buf_check_size(buf, 4)) { - ret = le32_to_cpu(*(__le32 *)buf->p); - buf->p += 4; - } - - return ret; -} - -static u64 buf_get_int64(struct cbuf *buf) -{ - u64 ret = 0; - - if (buf_check_size(buf, 8)) { - ret = le64_to_cpu(*(__le64 *)buf->p); - buf->p += 8; - } - - return ret; -} - -static void buf_get_str(struct cbuf *buf, struct p9_str *vstr) -{ - vstr->len = buf_get_int16(buf); - if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { - vstr->str = buf->p; - buf->p += vstr->len; - } else { - vstr->len = 0; - vstr->str = NULL; - } -} - -static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid) -{ - qid->type = buf_get_int8(bufp); - qid->version = buf_get_int32(bufp); - qid->path = buf_get_int64(bufp); -} - -/** - * p9_size_wstat - calculate the size of a variable length stat struct - * @wstat: metadata (stat) structure - * @dotu: non-zero if 9P2000.u - * - */ - -static int p9_size_wstat(struct p9_wstat *wstat, int dotu) -{ - int size = 0; - - if (wstat == NULL) { - P9_EPRINTK(KERN_ERR, "p9_size_stat: got a NULL stat pointer\n"); - return 0; - } - - size = /* 2 + *//* size[2] */ - 2 + /* type[2] */ - 4 + /* dev[4] */ - 1 + /* qid.type[1] */ - 4 + /* qid.vers[4] */ - 8 + /* qid.path[8] */ - 4 + /* mode[4] */ - 4 + /* atime[4] */ - 4 + /* mtime[4] */ - 8 + /* length[8] */ - 8; /* minimum sum of string lengths */ - - if (wstat->name) - size += strlen(wstat->name); - if (wstat->uid) - size += strlen(wstat->uid); - if (wstat->gid) - size += strlen(wstat->gid); - if (wstat->muid) - size += strlen(wstat->muid); - - if (dotu) { - size += 4 + /* n_uid[4] */ - 4 + /* n_gid[4] */ - 4 + /* n_muid[4] */ - 2; /* string length of extension[4] */ - if (wstat->extension) - size += strlen(wstat->extension); - } - - return size; -} - -/** - * buf_get_stat - safely decode a recieved metadata (stat) structure - * @bufp: buffer to deserialize - * @stat: metadata (stat) structure - * @dotu: non-zero if 9P2000.u - * - */ - -static void -buf_get_stat(struct cbuf *bufp, struct p9_stat *stat, int dotu) -{ - stat->size = buf_get_int16(bufp); - stat->type = buf_get_int16(bufp); - stat->dev = buf_get_int32(bufp); - stat->qid.type = buf_get_int8(bufp); - stat->qid.version = buf_get_int32(bufp); - stat->qid.path = buf_get_int64(bufp); - stat->mode = buf_get_int32(bufp); - stat->atime = buf_get_int32(bufp); - stat->mtime = buf_get_int32(bufp); - stat->length = buf_get_int64(bufp); - buf_get_str(bufp, &stat->name); - buf_get_str(bufp, &stat->uid); - buf_get_str(bufp, &stat->gid); - buf_get_str(bufp, &stat->muid); - - if (dotu) { - buf_get_str(bufp, &stat->extension); - stat->n_uid = buf_get_int32(bufp); - stat->n_gid = buf_get_int32(bufp); - stat->n_muid = buf_get_int32(bufp); - } -} - -/** - * p9_deserialize_stat - decode a received metadata structure - * @buf: buffer to deserialize - * @buflen: length of received buffer - * @stat: metadata structure to decode into - * @dotu: non-zero if 9P2000.u - * - * Note: stat will point to the buf region. - */ - -int -p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat, - int dotu) -{ - struct cbuf buffer; - struct cbuf *bufp = &buffer; - unsigned char *p; - - buf_init(bufp, buf, buflen); - p = bufp->p; - buf_get_stat(bufp, stat, dotu); - - if (buf_check_overflow(bufp)) - return 0; - else - return bufp->p - p; -} -EXPORT_SYMBOL(p9_deserialize_stat); - -/** - * deserialize_fcall - unmarshal a response - * @buf: recieved buffer - * @buflen: length of received buffer - * @rcall: fcall structure to populate - * @rcalllen: length of fcall structure to populate - * @dotu: non-zero if 9P2000.u - * - */ - -int -p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *rcall, - int dotu) -{ - - struct cbuf buffer; - struct cbuf *bufp = &buffer; - int i = 0; - - buf_init(bufp, buf, buflen); - - rcall->size = buf_get_int32(bufp); - rcall->id = buf_get_int8(bufp); - rcall->tag = buf_get_int16(bufp); - - P9_DPRINTK(P9_DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, - rcall->id, rcall->tag); - - switch (rcall->id) { - default: - P9_EPRINTK(KERN_ERR, "unknown message type: %d\n", rcall->id); - return -EPROTO; - case P9_RVERSION: - rcall->params.rversion.msize = buf_get_int32(bufp); - buf_get_str(bufp, &rcall->params.rversion.version); - break; - case P9_RFLUSH: - break; - case P9_RATTACH: - rcall->params.rattach.qid.type = buf_get_int8(bufp); - rcall->params.rattach.qid.version = buf_get_int32(bufp); - rcall->params.rattach.qid.path = buf_get_int64(bufp); - break; - case P9_RWALK: - rcall->params.rwalk.nwqid = buf_get_int16(bufp); - if (rcall->params.rwalk.nwqid > P9_MAXWELEM) { - P9_EPRINTK(KERN_ERR, - "Rwalk with more than %d qids: %d\n", - P9_MAXWELEM, rcall->params.rwalk.nwqid); - return -EPROTO; - } - - for (i = 0; i < rcall->params.rwalk.nwqid; i++) - buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); - break; - case P9_ROPEN: - buf_get_qid(bufp, &rcall->params.ropen.qid); - rcall->params.ropen.iounit = buf_get_int32(bufp); - break; - case P9_RCREATE: - buf_get_qid(bufp, &rcall->params.rcreate.qid); - rcall->params.rcreate.iounit = buf_get_int32(bufp); - break; - case P9_RREAD: - rcall->params.rread.count = buf_get_int32(bufp); - rcall->params.rread.data = bufp->p; - buf_check_size(bufp, rcall->params.rread.count); - break; - case P9_RWRITE: - rcall->params.rwrite.count = buf_get_int32(bufp); - break; - case P9_RCLUNK: - break; - case P9_RREMOVE: - break; - case P9_RSTAT: - buf_get_int16(bufp); - buf_get_stat(bufp, &rcall->params.rstat.stat, dotu); - break; - case P9_RWSTAT: - break; - case P9_RERROR: - buf_get_str(bufp, &rcall->params.rerror.error); - if (dotu) - rcall->params.rerror.errno = buf_get_int16(bufp); - break; - } - - if (buf_check_overflow(bufp)) { - P9_DPRINTK(P9_DEBUG_ERROR, "buffer overflow\n"); - return -EIO; - } - - return bufp->p - bufp->sp; -} -EXPORT_SYMBOL(p9_deserialize_fcall); - -static inline void p9_put_int8(struct cbuf *bufp, u8 val, u8 * p) -{ - *p = val; - buf_put_int8(bufp, val); -} - -static inline void p9_put_int16(struct cbuf *bufp, u16 val, u16 * p) -{ - *p = val; - buf_put_int16(bufp, val); -} - -static inline void p9_put_int32(struct cbuf *bufp, u32 val, u32 * p) -{ - *p = val; - buf_put_int32(bufp, val); -} - -static inline void p9_put_int64(struct cbuf *bufp, u64 val, u64 * p) -{ - *p = val; - buf_put_int64(bufp, val); -} - -static void -p9_put_str(struct cbuf *bufp, char *data, struct p9_str *str) -{ - int len; - char *s; - - if (data) - len = strlen(data); - else - len = 0; - - s = buf_put_stringn(bufp, data, len); - if (str) { - str->len = len; - str->str = s; - } -} - -static int -p9_put_data(struct cbuf *bufp, const char *data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - if (*pdata == NULL) - return -ENOMEM; - memmove(*pdata, data, count); - return 0; -} - -static int -p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - if (*pdata == NULL) - return -ENOMEM; - return copy_from_user(*pdata, data, count); -} - -static void -p9_put_wstat(struct cbuf *bufp, struct p9_wstat *wstat, - struct p9_stat *stat, int statsz, int dotu) -{ - p9_put_int16(bufp, statsz, &stat->size); - p9_put_int16(bufp, wstat->type, &stat->type); - p9_put_int32(bufp, wstat->dev, &stat->dev); - p9_put_int8(bufp, wstat->qid.type, &stat->qid.type); - p9_put_int32(bufp, wstat->qid.version, &stat->qid.version); - p9_put_int64(bufp, wstat->qid.path, &stat->qid.path); - p9_put_int32(bufp, wstat->mode, &stat->mode); - p9_put_int32(bufp, wstat->atime, &stat->atime); - p9_put_int32(bufp, wstat->mtime, &stat->mtime); - p9_put_int64(bufp, wstat->length, &stat->length); - - p9_put_str(bufp, wstat->name, &stat->name); - p9_put_str(bufp, wstat->uid, &stat->uid); - p9_put_str(bufp, wstat->gid, &stat->gid); - p9_put_str(bufp, wstat->muid, &stat->muid); - - if (dotu) { - p9_put_str(bufp, wstat->extension, &stat->extension); - p9_put_int32(bufp, wstat->n_uid, &stat->n_uid); - p9_put_int32(bufp, wstat->n_gid, &stat->n_gid); - p9_put_int32(bufp, wstat->n_muid, &stat->n_muid); - } -} - -static struct p9_fcall * -p9_create_common(struct cbuf *bufp, u32 size, u8 id) -{ - struct p9_fcall *fc; - - size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ - fc = kmalloc(sizeof(struct p9_fcall) + size, GFP_KERNEL); - if (!fc) - return ERR_PTR(-ENOMEM); - - fc->sdata = (char *)fc + sizeof(*fc); - - buf_init(bufp, (char *)fc->sdata, size); - p9_put_int32(bufp, size, &fc->size); - p9_put_int8(bufp, id, &fc->id); - p9_put_int16(bufp, P9_NOTAG, &fc->tag); - - return fc; -} - -/** - * p9_set_tag - set the tag field of an &p9_fcall structure - * @fc: fcall structure to set tag within - * @tag: tag id to set - */ - -void p9_set_tag(struct p9_fcall *fc, u16 tag) -{ - fc->tag = tag; - *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); -} -EXPORT_SYMBOL(p9_set_tag); - -/** - * p9_create_tversion - allocates and creates a T_VERSION request - * @msize: requested maximum data size - * @version: version string to negotiate - * - */ -struct p9_fcall *p9_create_tversion(u32 msize, char *version) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(version); /* msize[4] version[s] */ - fc = p9_create_common(bufp, size, P9_TVERSION); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, msize, &fc->params.tversion.msize); - p9_put_str(bufp, version, &fc->params.tversion.version); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tversion); - -/** - * p9_create_tauth - allocates and creates a T_AUTH request - * @afid: handle to use for authentication protocol - * @uname: user name attempting to authenticate - * @aname: mount specifier for remote server - * @n_uname: numeric id for user attempting to authneticate - * @dotu: 9P2000.u extension flag - * - */ - -struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, - u32 n_uname, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* afid[4] uname[s] aname[s] */ - size = 4 + 2 + 2; - if (uname) - size += strlen(uname); - - if (aname) - size += strlen(aname); - - if (dotu) - size += 4; /* n_uname */ - - fc = p9_create_common(bufp, size, P9_TAUTH); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, afid, &fc->params.tauth.afid); - p9_put_str(bufp, uname, &fc->params.tauth.uname); - p9_put_str(bufp, aname, &fc->params.tauth.aname); - if (dotu) - p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tauth); - -/** - * p9_create_tattach - allocates and creates a T_ATTACH request - * @fid: handle to use for the new mount point - * @afid: handle to use for authentication protocol - * @uname: user name attempting to attach - * @aname: mount specifier for remote server - * @n_uname: numeric id for user attempting to attach - * @n_uname: numeric id for user attempting to attach - * @dotu: 9P2000.u extension flag - * - */ - -struct p9_fcall * -p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, - u32 n_uname, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] afid[4] uname[s] aname[s] */ - size = 4 + 4 + 2 + 2; - if (uname) - size += strlen(uname); - - if (aname) - size += strlen(aname); - - if (dotu) - size += 4; /* n_uname */ - - fc = p9_create_common(bufp, size, P9_TATTACH); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tattach.fid); - p9_put_int32(bufp, afid, &fc->params.tattach.afid); - p9_put_str(bufp, uname, &fc->params.tattach.uname); - p9_put_str(bufp, aname, &fc->params.tattach.aname); - if (dotu) - p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname); - -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tattach); - -/** - * p9_create_tflush - allocates and creates a T_FLUSH request - * @oldtag: tag id for the transaction we are attempting to cancel - * - */ - -struct p9_fcall *p9_create_tflush(u16 oldtag) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 2; /* oldtag[2] */ - fc = p9_create_common(bufp, size, P9_TFLUSH); - if (IS_ERR(fc)) - goto error; - - p9_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tflush); - -/** - * p9_create_twalk - allocates and creates a T_FLUSH request - * @fid: handle we are traversing from - * @newfid: a new handle for this transaction - * @nwname: number of path elements to traverse - * @wnames: array of path elements - * - */ - -struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, - char **wnames) -{ - int i, size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - if (nwname > P9_MAXWELEM) { - P9_DPRINTK(P9_DEBUG_ERROR, "nwname > %d\n", P9_MAXWELEM); - return NULL; - } - - size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ - for (i = 0; i < nwname; i++) { - size += 2 + strlen(wnames[i]); /* wname[s] */ - } - - fc = p9_create_common(bufp, size, P9_TWALK); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twalk.fid); - p9_put_int32(bufp, newfid, &fc->params.twalk.newfid); - p9_put_int16(bufp, nwname, &fc->params.twalk.nwname); - for (i = 0; i < nwname; i++) { - p9_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twalk); - -/** - * p9_create_topen - allocates and creates a T_OPEN request - * @fid: handle we are trying to open - * @mode: what mode we are trying to open the file in - * - */ - -struct p9_fcall *p9_create_topen(u32 fid, u8 mode) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 1; /* fid[4] mode[1] */ - fc = p9_create_common(bufp, size, P9_TOPEN); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.topen.fid); - p9_put_int8(bufp, mode, &fc->params.topen.mode); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_topen); - -/** - * p9_create_tcreate - allocates and creates a T_CREATE request - * @fid: handle of directory we are trying to create in - * @name: name of the file we are trying to create - * @perm: permissions for the file we are trying to create - * @mode: what mode we are trying to open the file in - * @extension: 9p2000.u extension string (for special files) - * @dotu: 9p2000.u enabled flag - * - * Note: Plan 9 create semantics include opening the resulting file - * which is why mode is included. - */ - -struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, - char *extension, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] name[s] perm[4] mode[1] */ - size = 4 + 2 + strlen(name) + 4 + 1; - if (dotu) { - size += 2 + /* extension[s] */ - (extension == NULL ? 0 : strlen(extension)); - } - - fc = p9_create_common(bufp, size, P9_TCREATE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tcreate.fid); - p9_put_str(bufp, name, &fc->params.tcreate.name); - p9_put_int32(bufp, perm, &fc->params.tcreate.perm); - p9_put_int8(bufp, mode, &fc->params.tcreate.mode); - if (dotu) - p9_put_str(bufp, extension, &fc->params.tcreate.extension); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tcreate); - -/** - * p9_create_tread - allocates and creates a T_READ request - * @fid: handle of the file we are trying to read - * @offset: offset to start reading from - * @count: how many bytes to read - */ - -struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ - fc = p9_create_common(bufp, size, P9_TREAD); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tread.fid); - p9_put_int64(bufp, offset, &fc->params.tread.offset); - p9_put_int32(bufp, count, &fc->params.tread.count); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tread); - -/** - * p9_create_twrite - allocates and creates a T_WRITE request from the kernel - * @fid: handle of the file we are trying to write - * @offset: offset to start writing at - * @count: how many bytes to write - * @data: data to write - * - * This function will create a requst with data buffers from the kernel - * such as the page cache. - */ - -struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, - const char *data) -{ - int size, err; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] offset[8] count[4] data[count] */ - size = 4 + 8 + 4 + count; - fc = p9_create_common(bufp, size, P9_TWRITE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twrite.fid); - p9_put_int64(bufp, offset, &fc->params.twrite.offset); - p9_put_int32(bufp, count, &fc->params.twrite.count); - err = p9_put_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - goto error; - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twrite); - -/** - * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace - * @fid: handle of the file we are trying to write - * @offset: offset to start writing at - * @count: how many bytes to write - * @data: data to write - * - * This function will create a request with data buffers from userspace - */ - -struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, - const char __user *data) -{ - int size, err; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] offset[8] count[4] data[count] */ - size = 4 + 8 + 4 + count; - fc = p9_create_common(bufp, size, P9_TWRITE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twrite.fid); - p9_put_int64(bufp, offset, &fc->params.twrite.offset); - p9_put_int32(bufp, count, &fc->params.twrite.count); - err = p9_put_user_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - goto error; - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twrite_u); - -/** - * p9_create_tclunk - allocate a request to forget about a file handle - * @fid: handle of the file we closing or forgetting about - * - * clunk is used both to close open files and to discard transient handles - * which may be created during meta-data operations and hierarchy traversal. - */ - -struct p9_fcall *p9_create_tclunk(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TCLUNK); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tclunk.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tclunk); - -/** - * p9_create_tremove - allocate and create a request to remove a file - * @fid: handle of the file or directory we are removing - * - */ - -struct p9_fcall *p9_create_tremove(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TREMOVE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tremove.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tremove); - -/** - * p9_create_tstat - allocate and populate a request for attributes - * @fid: handle of the file or directory we are trying to get the attributes of - * - */ - -struct p9_fcall *p9_create_tstat(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TSTAT); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tstat.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tstat); - -/** - * p9_create_tstat - allocate and populate a request to change attributes - * @fid: handle of the file or directory we are trying to change - * @wstat: &p9_stat structure with attributes we wish to set - * @dotu: 9p2000.u enabled flag - * - */ - -struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, - int dotu) -{ - int size, statsz; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - statsz = p9_size_wstat(wstat, dotu); - size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ - fc = p9_create_common(bufp, size, P9_TWSTAT); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twstat.fid); - buf_put_int16(bufp, statsz + 2); - p9_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, dotu); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twstat); - diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c deleted file mode 100644 index 53dd8e28dd8..00000000000 --- a/net/9p/fcprint.c +++ /dev/null @@ -1,366 +0,0 @@ -/* - * net/9p/fcprint.c - * - * Print 9P call. - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/idr.h> -#include <net/9p/9p.h> - -#ifdef CONFIG_NET_9P_DEBUG - -static int -p9_printqid(char *buf, int buflen, struct p9_qid *q) -{ - int n; - char b[10]; - - n = 0; - if (q->type & P9_QTDIR) - b[n++] = 'd'; - if (q->type & P9_QTAPPEND) - b[n++] = 'a'; - if (q->type & P9_QTAUTH) - b[n++] = 'A'; - if (q->type & P9_QTEXCL) - b[n++] = 'l'; - if (q->type & P9_QTTMP) - b[n++] = 't'; - if (q->type & P9_QTSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "(%.16llx %x %s)", - (long long int) q->path, q->version, b); -} - -static int -p9_printperm(char *buf, int buflen, int perm) -{ - int n; - char b[15]; - - n = 0; - if (perm & P9_DMDIR) - b[n++] = 'd'; - if (perm & P9_DMAPPEND) - b[n++] = 'a'; - if (perm & P9_DMAUTH) - b[n++] = 'A'; - if (perm & P9_DMEXCL) - b[n++] = 'l'; - if (perm & P9_DMTMP) - b[n++] = 't'; - if (perm & P9_DMDEVICE) - b[n++] = 'D'; - if (perm & P9_DMSOCKET) - b[n++] = 'S'; - if (perm & P9_DMNAMEDPIPE) - b[n++] = 'P'; - if (perm & P9_DMSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "%s%03o", b, perm&077); -} - -static int -p9_printstat(char *buf, int buflen, struct p9_stat *st, int extended) -{ - int n; - - n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len, - st->name.str, st->uid.len, st->uid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid); - - n += scnprintf(buf+n, buflen-n, " q "); - n += p9_printqid(buf+n, buflen-n, &st->qid); - n += scnprintf(buf+n, buflen-n, " m "); - n += p9_printperm(buf+n, buflen-n, st->mode); - n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld", - st->atime, st->mtime, (long long int) st->length); - - if (extended) - n += scnprintf(buf+n, buflen-n, " ext '%.*s'", - st->extension.len, st->extension.str); - - return n; -} - -static int -p9_dumpdata(char *buf, int buflen, u8 *data, int datalen) -{ - int i, n; - - i = n = 0; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; - } - n += scnprintf(buf + n, buflen - n, "\n"); - - return n; -} - -static int -p9_printdata(char *buf, int buflen, u8 *data, int datalen) -{ - return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16); -} - -/** - * p9_printfcall - decode and print a protocol structure into a buffer - * @buf: buffer to deposit decoded structure into - * @buflen: available space in buffer - * @fc: protocol rpc structure of type &p9_fcall - * @extended: whether or not session is operating with extended protocol - */ - -int -p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) -{ - int i, ret, type, tag; - - if (!fc) - return scnprintf(buf, buflen, "<NULL>"); - - type = fc->id; - tag = fc->tag; - - ret = 0; - switch (type) { - case P9_TVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Tversion tag %u msize %u version '%.*s'", tag, - fc->params.tversion.msize, - fc->params.tversion.version.len, - fc->params.tversion.version.str); - break; - - case P9_RVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Rversion tag %u msize %u version '%.*s'", tag, - fc->params.rversion.msize, - fc->params.rversion.version.len, - fc->params.rversion.version.str); - break; - - case P9_TAUTH: - ret += scnprintf(buf+ret, buflen-ret, - "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tauth.afid, fc->params.tauth.uname.len, - fc->params.tauth.uname.str, fc->params.tauth.aname.len, - fc->params.tauth.aname.str); - break; - - case P9_RAUTH: - ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag); - p9_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid); - break; - - case P9_TATTACH: - ret += scnprintf(buf+ret, buflen-ret, - "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tattach.fid, fc->params.tattach.afid, - fc->params.tattach.uname.len, fc->params.tattach.uname.str, - fc->params.tattach.aname.len, fc->params.tattach.aname.str); - break; - - case P9_RATTACH: - ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", - tag); - p9_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid); - break; - - case P9_RERROR: - ret += scnprintf(buf+ret, buflen-ret, - "Rerror tag %u ename '%.*s'", tag, - fc->params.rerror.error.len, - fc->params.rerror.error.str); - if (extended) - ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n", - fc->params.rerror.errno); - break; - - case P9_TFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u", - tag, fc->params.tflush.oldtag); - break; - - case P9_RFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag); - break; - - case P9_TWALK: - ret += scnprintf(buf+ret, buflen-ret, - "Twalk tag %u fid %d newfid %d nwname %d", tag, - fc->params.twalk.fid, fc->params.twalk.newfid, - fc->params.twalk.nwname); - for (i = 0; i < fc->params.twalk.nwname; i++) - ret += scnprintf(buf+ret, buflen-ret, " '%.*s'", - fc->params.twalk.wnames[i].len, - fc->params.twalk.wnames[i].str); - break; - - case P9_RWALK: - ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d", - tag, fc->params.rwalk.nwqid); - for (i = 0; i < fc->params.rwalk.nwqid; i++) - ret += p9_printqid(buf+ret, buflen-ret, - &fc->params.rwalk.wqids[i]); - break; - - case P9_TOPEN: - ret += scnprintf(buf+ret, buflen-ret, - "Topen tag %u fid %d mode %d", tag, - fc->params.topen.fid, fc->params.topen.mode); - break; - - case P9_ROPEN: - ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag); - ret += p9_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.ropen.iounit); - break; - - case P9_TCREATE: - ret += scnprintf(buf+ret, buflen-ret, - "Tcreate tag %u fid %d name '%.*s' perm ", tag, - fc->params.tcreate.fid, fc->params.tcreate.name.len, - fc->params.tcreate.name.str); - - ret += p9_printperm(buf+ret, buflen-ret, - fc->params.tcreate.perm); - ret += scnprintf(buf+ret, buflen-ret, " mode %d", - fc->params.tcreate.mode); - break; - - case P9_RCREATE: - ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag); - ret += p9_printqid(buf+ret, buflen-ret, - &fc->params.rcreate.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.rcreate.iounit); - break; - - case P9_TREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Tread tag %u fid %d offset %lld count %u", tag, - fc->params.tread.fid, - (long long int) fc->params.tread.offset, - fc->params.tread.count); - break; - - case P9_RREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Rread tag %u count %u data ", tag, - fc->params.rread.count); - ret += p9_printdata(buf+ret, buflen-ret, fc->params.rread.data, - fc->params.rread.count); - break; - - case P9_TWRITE: - ret += scnprintf(buf+ret, buflen-ret, - "Twrite tag %u fid %d offset %lld count %u data ", - tag, fc->params.twrite.fid, - (long long int) fc->params.twrite.offset, - fc->params.twrite.count); - ret += p9_printdata(buf+ret, buflen-ret, fc->params.twrite.data, - fc->params.twrite.count); - break; - - case P9_RWRITE: - ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u", - tag, fc->params.rwrite.count); - break; - - case P9_TCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d", - tag, fc->params.tclunk.fid); - break; - - case P9_RCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag); - break; - - case P9_TREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d", - tag, fc->params.tremove.fid); - break; - - case P9_RREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag); - break; - - case P9_TSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d", - tag, fc->params.tstat.fid); - break; - - case P9_RSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag); - ret += p9_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat, - extended); - break; - - case P9_TWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ", - tag, fc->params.twstat.fid); - ret += p9_printstat(buf+ret, buflen-ret, - &fc->params.twstat.stat, extended); - break; - - case P9_RWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag); - break; - - default: - ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type); - break; - } - - return ret; -} -#else -int -p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) -{ - return 0; -} -#endif /* CONFIG_NET_9P_DEBUG */ -EXPORT_SYMBOL(p9_printfcall); - diff --git a/net/9p/mod.c b/net/9p/mod.c index 1084feb24cb..cf8a4128cd5 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -29,6 +29,7 @@ #include <net/9p/9p.h> #include <linux/fs.h> #include <linux/parser.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/list.h> #include <linux/spinlock.h> diff --git a/net/9p/protocol.c b/net/9p/protocol.c new file mode 100644 index 00000000000..29be5243908 --- /dev/null +++ b/net/9p/protocol.c @@ -0,0 +1,558 @@ +/* + * net/9p/protocol.c + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include "protocol.h" + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef offset_of +#define offset_of(type, memb) \ + ((unsigned long)(&((type *)0)->memb)) +#endif +#ifndef container_of +#define container_of(obj, type, memb) \ + ((type *)(((char *)obj) - offset_of(type, memb))) +#endif + +static int +p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...); + +void +p9pdu_dump(int way, struct p9_fcall *pdu) +{ + int i, n; + u8 *data = pdu->sdata; + int datalen = pdu->size; + char buf[255]; + int buflen = 255; + + i = n = 0; + if (datalen > (buflen-16)) + datalen = buflen-16; + while (i < datalen) { + n += scnprintf(buf + n, buflen - n, "%02x ", data[i]); + if (i%4 == 3) + n += scnprintf(buf + n, buflen - n, " "); + if (i%32 == 31) + n += scnprintf(buf + n, buflen - n, "\n"); + + i++; + } + n += scnprintf(buf + n, buflen - n, "\n"); + + if (way) + P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf); + else + P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); +} +EXPORT_SYMBOL(p9pdu_dump); + +void p9stat_free(struct p9_wstat *stbuf) +{ + kfree(stbuf->name); + kfree(stbuf->uid); + kfree(stbuf->gid); + kfree(stbuf->muid); + kfree(stbuf->extension); +} +EXPORT_SYMBOL(p9stat_free); + +static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +{ + size_t len = MIN(pdu->size - pdu->offset, size); + memcpy(data, &pdu->sdata[pdu->offset], len); + pdu->offset += len; + return size - len; +} + +static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) +{ + size_t len = MIN(pdu->capacity - pdu->size, size); + memcpy(&pdu->sdata[pdu->size], data, len); + pdu->size += len; + return size - len; +} + +static size_t +pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) +{ + size_t len = MIN(pdu->capacity - pdu->size, size); + int err = copy_from_user(&pdu->sdata[pdu->size], udata, len); + if (err) + printk(KERN_WARNING "pdu_write_u returning: %d\n", err); + + pdu->size += len; + return size - len; +} + +/* + b - int8_t + w - int16_t + d - int32_t + q - int64_t + s - string + S - stat + Q - qid + D - data blob (int32_t size followed by void *, results are not freed) + T - array of strings (int16_t count, followed by strings) + R - array of qids (int16_t count, followed by qids) + ? - if optional = 1, continue parsing +*/ + +static int +p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t *val = va_arg(ap, int8_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + } + break; + case 'w':{ + int16_t *val = va_arg(ap, int16_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le16(*val); + } + break; + case 'd':{ + int32_t *val = va_arg(ap, int32_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le32(*val); + } + break; + case 'q':{ + int64_t *val = va_arg(ap, int64_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le64(*val); + } + break; + case 's':{ + char **ptr = va_arg(ap, char **); + int16_t len; + int size; + + errcode = p9pdu_readf(pdu, optional, "w", &len); + if (errcode) + break; + + size = MAX(len, 0); + + *ptr = kmalloc(size + 1, GFP_KERNEL); + if (*ptr == NULL) { + errcode = -EFAULT; + break; + } + if (pdu_read(pdu, *ptr, size)) { + errcode = -EFAULT; + kfree(*ptr); + *ptr = NULL; + } else + (*ptr)[size] = 0; + } + break; + case 'Q':{ + struct p9_qid *qid = + va_arg(ap, struct p9_qid *); + + errcode = p9pdu_readf(pdu, optional, "bdq", + &qid->type, &qid->version, + &qid->path); + } + break; + case 'S':{ + struct p9_wstat *stbuf = + va_arg(ap, struct p9_wstat *); + + memset(stbuf, 0, sizeof(struct p9_wstat)); + stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = + -1; + errcode = + p9pdu_readf(pdu, optional, + "wwdQdddqssss?sddd", + &stbuf->size, &stbuf->type, + &stbuf->dev, &stbuf->qid, + &stbuf->mode, &stbuf->atime, + &stbuf->mtime, &stbuf->length, + &stbuf->name, &stbuf->uid, + &stbuf->gid, &stbuf->muid, + &stbuf->extension, + &stbuf->n_uid, &stbuf->n_gid, + &stbuf->n_muid); + if (errcode) + p9stat_free(stbuf); + } + break; + case 'D':{ + int32_t *count = va_arg(ap, int32_t *); + void **data = va_arg(ap, void **); + + errcode = + p9pdu_readf(pdu, optional, "d", count); + if (!errcode) { + *count = + MIN(*count, + pdu->size - pdu->offset); + *data = &pdu->sdata[pdu->offset]; + } + } + break; + case 'T':{ + int16_t *nwname = va_arg(ap, int16_t *); + char ***wnames = va_arg(ap, char ***); + + errcode = + p9pdu_readf(pdu, optional, "w", nwname); + if (!errcode) { + *wnames = + kmalloc(sizeof(char *) * *nwname, + GFP_KERNEL); + if (!*wnames) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwname; i++) { + errcode = + p9pdu_readf(pdu, optional, + "s", + &(*wnames)[i]); + if (errcode) + break; + } + } + + if (errcode) { + if (*wnames) { + int i; + + for (i = 0; i < *nwname; i++) + kfree((*wnames)[i]); + } + kfree(*wnames); + *wnames = NULL; + } + } + break; + case 'R':{ + int16_t *nwqid = va_arg(ap, int16_t *); + struct p9_qid **wqids = + va_arg(ap, struct p9_qid **); + + *wqids = NULL; + + errcode = + p9pdu_readf(pdu, optional, "w", nwqid); + if (!errcode) { + *wqids = + kmalloc(*nwqid * + sizeof(struct p9_qid), + GFP_KERNEL); + if (*wqids == NULL) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwqid; i++) { + errcode = + p9pdu_readf(pdu, optional, + "Q", + &(*wqids)[i]); + if (errcode) + break; + } + } + + if (errcode) { + kfree(*wqids); + *wqids = NULL; + } + } + break; + case '?': + if (!optional) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int +p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t val = va_arg(ap, int); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'w':{ + int16_t val = va_arg(ap, int); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'd':{ + int32_t val = va_arg(ap, int32_t); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'q':{ + int64_t val = va_arg(ap, int64_t); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 's':{ + const char *ptr = va_arg(ap, const char *); + int16_t len = 0; + if (ptr) + len = MIN(strlen(ptr), USHORT_MAX); + + errcode = p9pdu_writef(pdu, optional, "w", len); + if (!errcode && pdu_write(pdu, ptr, len)) + errcode = -EFAULT; + } + break; + case 'Q':{ + const struct p9_qid *qid = + va_arg(ap, const struct p9_qid *); + errcode = + p9pdu_writef(pdu, optional, "bdq", + qid->type, qid->version, + qid->path); + } break; + case 'S':{ + const struct p9_wstat *stbuf = + va_arg(ap, const struct p9_wstat *); + errcode = + p9pdu_writef(pdu, optional, + "wwdQdddqssss?sddd", + stbuf->size, stbuf->type, + stbuf->dev, &stbuf->qid, + stbuf->mode, stbuf->atime, + stbuf->mtime, stbuf->length, + stbuf->name, stbuf->uid, + stbuf->gid, stbuf->muid, + stbuf->extension, stbuf->n_uid, + stbuf->n_gid, stbuf->n_muid); + } break; + case 'D':{ + int32_t count = va_arg(ap, int32_t); + const void *data = va_arg(ap, const void *); + + errcode = + p9pdu_writef(pdu, optional, "d", count); + if (!errcode && pdu_write(pdu, data, count)) + errcode = -EFAULT; + } + break; + case 'U':{ + int32_t count = va_arg(ap, int32_t); + const char __user *udata = + va_arg(ap, const void *); + errcode = + p9pdu_writef(pdu, optional, "d", count); + if (!errcode && pdu_write_u(pdu, udata, count)) + errcode = -EFAULT; + } + break; + case 'T':{ + int16_t nwname = va_arg(ap, int); + const char **wnames = va_arg(ap, const char **); + + errcode = + p9pdu_writef(pdu, optional, "w", nwname); + if (!errcode) { + int i; + + for (i = 0; i < nwname; i++) { + errcode = + p9pdu_writef(pdu, optional, + "s", + wnames[i]); + if (errcode) + break; + } + } + } + break; + case 'R':{ + int16_t nwqid = va_arg(ap, int); + struct p9_qid *wqids = + va_arg(ap, struct p9_qid *); + + errcode = + p9pdu_writef(pdu, optional, "w", nwqid); + if (!errcode) { + int i; + + for (i = 0; i < nwqid; i++) { + errcode = + p9pdu_writef(pdu, optional, + "Q", + &wqids[i]); + if (errcode) + break; + } + } + } + break; + case '?': + if (!optional) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vreadf(pdu, optional, fmt, ap); + va_end(ap); + + return ret; +} + +static int +p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vwritef(pdu, optional, fmt, ap); + va_end(ap); + + return ret; +} + +int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu) +{ + struct p9_fcall fake_pdu; + int ret; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, dotu, "S", st); + if (ret) { + P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); + p9pdu_dump(1, &fake_pdu); + } + + return ret; +} +EXPORT_SYMBOL(p9stat_read); + +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) +{ + return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); +} + +int p9pdu_finalize(struct p9_fcall *pdu) +{ + int size = pdu->size; + int err; + + pdu->size = 0; + err = p9pdu_writef(pdu, 0, "d", size); + pdu->size = size; + + if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) + p9pdu_dump(0, pdu); + + P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, + pdu->id, pdu->tag); + + return err; +} + +void p9pdu_reset(struct p9_fcall *pdu) +{ + pdu->offset = 0; + pdu->size = 0; +} diff --git a/net/9p/protocol.h b/net/9p/protocol.h new file mode 100644 index 00000000000..ccde462e7ac --- /dev/null +++ b/net/9p/protocol.h @@ -0,0 +1,34 @@ +/* + * net/9p/protocol.h + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +int +p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap); +int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...); +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); +int p9pdu_finalize(struct p9_fcall *pdu); +void p9pdu_dump(int, struct p9_fcall *); +void p9pdu_reset(struct p9_fcall *pdu); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6dabbdb6665..be65d8242fd 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -39,12 +39,11 @@ #include <linux/file.h> #include <linux/parser.h> #include <net/9p/9p.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #define P9_PORT 564 #define MAX_SOCK_BUF (64*1024) -#define ERREQFLUSH 1 -#define SCHED_TIMEOUT 10 #define MAXPOLLWADDR 2 /** @@ -61,7 +60,6 @@ struct p9_fd_opts { u16 port; }; - /** * struct p9_trans_fd - transport state * @rd: reference to file to read from @@ -100,60 +98,22 @@ enum { Wpending = 8, /* can write */ }; -enum { - None, - Flushing, - Flushed, -}; - -struct p9_req; -typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); - -/** - * struct p9_req - fd mux encoding of an rpc transaction - * @lock: protects req_list - * @tag: numeric tag for rpc transaction - * @tcall: request &p9_fcall structure - * @rcall: response &p9_fcall structure - * @err: error state - * @cb: callback for when response is received - * @cba: argument to pass to callback - * @flush: flag to indicate RPC has been flushed - * @req_list: list link for higher level objects to chain requests - * - */ - -struct p9_req { - spinlock_t lock; - int tag; - struct p9_fcall *tcall; - struct p9_fcall *rcall; - int err; - p9_conn_req_callback cb; - void *cba; - int flush; - struct list_head req_list; -}; - -struct p9_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; +struct p9_poll_wait { + struct p9_conn *conn; + wait_queue_t wait; + wait_queue_head_t *wait_addr; }; /** * struct p9_conn - fd mux connection state information - * @lock: protects mux_list (?) * @mux_list: list link for mux to manage multiple connections (?) - * @poll_task: task polling on this connection - * @msize: maximum size for connection (dup) - * @extended: 9p2000.u flag (dup) - * @trans: reference to transport instance for this connection - * @tagpool: id accounting for transactions + * @client: reference to client instance for this connection * @err: error state * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent - * @rcall: current response &p9_fcall structure + * @req: current request being processed (if any) + * @tmp_buf: temporary buffer to read in header + * @rsize: amount to read for current frame * @rpos: read position in current frame * @rbuf: current read buffer * @wpos: write position for current frame @@ -169,409 +129,300 @@ struct p9_mux_poll_task { */ struct p9_conn { - spinlock_t lock; /* protect lock structure */ struct list_head mux_list; - struct p9_mux_poll_task *poll_task; - int msize; - unsigned char extended; - struct p9_trans *trans; - struct p9_idpool *tagpool; + struct p9_client *client; int err; struct list_head req_list; struct list_head unsent_req_list; - struct p9_fcall *rcall; + struct p9_req_t *req; + char tmp_buf[7]; + int rsize; int rpos; char *rbuf; int wpos; int wsize; char *wbuf; - wait_queue_t poll_wait[MAXPOLLWADDR]; - wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; + struct list_head poll_pending_link; + struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; -/** - * struct p9_mux_rpc - fd mux rpc accounting structure - * @m: connection this request was issued on - * @err: error state - * @tcall: request &p9_fcall - * @rcall: response &p9_fcall - * @wqueue: wait queue that client is blocked on for this rpc - * - * Bug: isn't this information duplicated elsewhere like &p9_req - */ - -struct p9_mux_rpc { - struct p9_conn *m; - int err; - struct p9_fcall *tcall; - struct p9_fcall *rcall; - wait_queue_head_t wqueue; -}; - -static int p9_poll_proc(void *); -static void p9_read_work(struct work_struct *work); -static void p9_write_work(struct work_struct *work); -static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p); -static int p9_fd_write(struct p9_trans *trans, void *v, int len); -static int p9_fd_read(struct p9_trans *trans, void *v, int len); - -static DEFINE_MUTEX(p9_mux_task_lock); +static DEFINE_SPINLOCK(p9_poll_lock); +static LIST_HEAD(p9_poll_pending_list); static struct workqueue_struct *p9_mux_wq; +static struct task_struct *p9_poll_task; -static int p9_mux_num; -static int p9_mux_poll_task_num; -static struct p9_mux_poll_task p9_mux_poll_tasks[100]; - -static void p9_conn_destroy(struct p9_conn *); -static unsigned int p9_fd_poll(struct p9_trans *trans, - struct poll_table_struct *pt); - -#ifdef P9_NONBLOCK -static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, - p9_conn_req_callback cb, void *a); -#endif /* P9_NONBLOCK */ - -static void p9_conn_cancel(struct p9_conn *m, int err); - -static u16 p9_mux_get_tag(struct p9_conn *m) +static void p9_mux_poll_stop(struct p9_conn *m) { - int tag; + unsigned long flags; + int i; - tag = p9_idpool_get(m->tagpool); - if (tag < 0) - return P9_NOTAG; - else - return (u16) tag; -} + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + struct p9_poll_wait *pwait = &m->poll_wait[i]; -static void p9_mux_put_tag(struct p9_conn *m, u16 tag) -{ - if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool)) - p9_idpool_put(tag, m->tagpool); + if (pwait->wait_addr) { + remove_wait_queue(pwait->wait_addr, &pwait->wait); + pwait->wait_addr = NULL; + } + } + + spin_lock_irqsave(&p9_poll_lock, flags); + list_del_init(&m->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); } /** - * p9_mux_calc_poll_procs - calculates the number of polling procs - * @muxnum: number of mounts + * p9_conn_cancel - cancel all pending requests with error + * @m: mux data + * @err: error code * - * Calculation is based on the number of mounted v9fs filesystems. - * The current implementation returns sqrt of the number of mounts. */ -static int p9_mux_calc_poll_procs(int muxnum) +static void p9_conn_cancel(struct p9_conn *m, int err) { - int n; - - if (p9_mux_poll_task_num) - n = muxnum / p9_mux_poll_task_num + - (muxnum % p9_mux_poll_task_num ? 1 : 0); - else - n = 1; - - if (n > ARRAY_SIZE(p9_mux_poll_tasks)) - n = ARRAY_SIZE(p9_mux_poll_tasks); - - return n; -} + struct p9_req_t *req, *rtmp; + unsigned long flags; + LIST_HEAD(cancel_list); -static int p9_mux_poll_start(struct p9_conn *m) -{ - int i, n; - struct p9_mux_poll_task *vpt, *vptlast; - struct task_struct *pproc; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num, - p9_mux_poll_task_num); - mutex_lock(&p9_mux_task_lock); - - n = p9_mux_calc_poll_procs(p9_mux_num + 1); - if (n > p9_mux_poll_task_num) { - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { - if (p9_mux_poll_tasks[i].task == NULL) { - vpt = &p9_mux_poll_tasks[i]; - P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n", - vpt); - pproc = kthread_create(p9_poll_proc, vpt, - "v9fs-poll"); - - if (!IS_ERR(pproc)) { - vpt->task = pproc; - INIT_LIST_HEAD(&vpt->mux_list); - vpt->muxnum = 0; - p9_mux_poll_task_num++; - wake_up_process(vpt->task); - } - break; - } - } + P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) - P9_DPRINTK(P9_DEBUG_ERROR, - "warning: no free poll slots\n"); - } + spin_lock_irqsave(&m->client->lock, flags); - n = (p9_mux_num + 1) / p9_mux_poll_task_num + - ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0); - - vptlast = NULL; - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { - vpt = &p9_mux_poll_tasks[i]; - if (vpt->task != NULL) { - vptlast = vpt; - if (vpt->muxnum < n) { - P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vpt->mux_list); - vpt->muxnum++; - m->poll_task = vpt; - memset(&m->poll_waddr, 0, - sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, p9_pollwait); - break; - } - } + if (m->err) { + spin_unlock_irqrestore(&m->client->lock, flags); + return; } - if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { - if (vptlast == NULL) { - mutex_unlock(&p9_mux_task_lock); - return -ENOMEM; - } + m->err = err; - P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vptlast->mux_list); - vptlast->muxnum++; - m->poll_task = vptlast; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, p9_pollwait); + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + req->status = REQ_STATUS_ERROR; + if (!req->t_err) + req->t_err = err; + list_move(&req->req_list, &cancel_list); } - - p9_mux_num++; - mutex_unlock(&p9_mux_task_lock); - - return 0; -} - -static void p9_mux_poll_stop(struct p9_conn *m) -{ - int i; - struct p9_mux_poll_task *vpt; - - mutex_lock(&p9_mux_task_lock); - vpt = m->poll_task; - list_del(&m->mux_list); - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (m->poll_waddr[i] != NULL) { - remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); - m->poll_waddr[i] = NULL; - } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + req->status = REQ_STATUS_ERROR; + if (!req->t_err) + req->t_err = err; + list_move(&req->req_list, &cancel_list); } - vpt->muxnum--; - if (!vpt->muxnum) { - P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt); - kthread_stop(vpt->task); - vpt->task = NULL; - p9_mux_poll_task_num--; + spin_unlock_irqrestore(&m->client->lock, flags); + + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + list_del(&req->req_list); + P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + p9_client_cb(m->client, req); } - p9_mux_num--; - mutex_unlock(&p9_mux_task_lock); } -/** - * p9_conn_create - allocate and initialize the per-session mux data - * @trans: transport structure - * - * Note: Creates the polling task if this is the first session. - */ - -static struct p9_conn *p9_conn_create(struct p9_trans *trans) +static unsigned int +p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) { - int i, n; - struct p9_conn *m; + int ret, n; + struct p9_trans_fd *ts = NULL; - P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, - trans->msize); - m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); + if (client && client->status == Connected) + ts = client->trans; - spin_lock_init(&m->lock); - INIT_LIST_HEAD(&m->mux_list); - m->msize = trans->msize; - m->extended = trans->extended; - m->trans = trans; - m->tagpool = p9_idpool_create(); - if (IS_ERR(m->tagpool)) { - kfree(m); - return ERR_PTR(-ENOMEM); - } + if (!ts) + return -EREMOTEIO; - INIT_LIST_HEAD(&m->req_list); - INIT_LIST_HEAD(&m->unsent_req_list); - INIT_WORK(&m->rq, p9_read_work); - INIT_WORK(&m->wq, p9_write_work); - n = p9_mux_poll_start(m); - if (n) { - kfree(m); - return ERR_PTR(n); - } + if (!ts->rd->f_op || !ts->rd->f_op->poll) + return -EIO; - n = p9_fd_poll(trans, &m->pt); - if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); - set_bit(Rpending, &m->wsched); - } + if (!ts->wr->f_op || !ts->wr->f_op->poll) + return -EIO; - if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); - set_bit(Wpending, &m->wsched); - } + ret = ts->rd->f_op->poll(ts->rd, pt); + if (ret < 0) + return ret; - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (IS_ERR(m->poll_waddr[i])) { - p9_mux_poll_stop(m); - kfree(m); - return (void *)m->poll_waddr; /* the error code */ - } + if (ts->rd != ts->wr) { + n = ts->wr->f_op->poll(ts->wr, pt); + if (n < 0) + return n; + ret = (ret & ~POLLOUT) | (n & ~POLLIN); } - return m; + return ret; } /** - * p9_mux_destroy - cancels all pending requests and frees mux resources - * @m: mux to destroy + * p9_fd_read- read from a fd + * @client: client instance + * @v: buffer to receive data into + * @len: size of receive buffer * */ -static void p9_conn_destroy(struct p9_conn *m) +static int p9_fd_read(struct p9_client *client, void *v, int len) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); + int ret; + struct p9_trans_fd *ts = NULL; - p9_mux_poll_stop(m); - cancel_work_sync(&m->rq); - cancel_work_sync(&m->wq); + if (client && client->status != Disconnected) + ts = client->trans; - p9_conn_cancel(m, -ECONNRESET); + if (!ts) + return -EREMOTEIO; - m->trans = NULL; - p9_idpool_destroy(m->tagpool); - kfree(m); + if (!(ts->rd->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); + + ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; } /** - * p9_pollwait - add poll task to the wait queue - * @filp: file pointer being polled - * @wait_address: wait_q to block on - * @p: poll state + * p9_read_work - called when there is some data to be read from a transport + * @work: container of work to be done * - * called by files poll operation to add v9fs-poll task to files wait queue */ -static void -p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) +static void p9_read_work(struct work_struct *work) { - int i; + int n, err; struct p9_conn *m; - m = container_of(p, struct p9_conn, pt); - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) - if (m->poll_waddr[i] == NULL) - break; + m = container_of(work, struct p9_conn, rq); - if (i >= ARRAY_SIZE(m->poll_waddr)) { - P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + if (m->err < 0) return; - } - m->poll_waddr[i] = wait_address; + P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); - if (!wait_address) { - P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n"); - m->poll_waddr[i] = ERR_PTR(-EIO); + if (!m->rbuf) { + m->rbuf = m->tmp_buf; + m->rpos = 0; + m->rsize = 7; /* start by reading header */ + } + + clear_bit(Rpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m, + m->rpos, m->rsize, m->rsize-m->rpos); + err = p9_fd_read(m->client, m->rbuf + m->rpos, + m->rsize - m->rpos); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Rworksched, &m->wsched); return; } - init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); - add_wait_queue(wait_address, &m->poll_wait[i]); -} + if (err <= 0) + goto error; -/** - * p9_poll_mux - polls a mux and schedules read or write works if necessary - * @m: connection to poll - * - */ + m->rpos += err; -static void p9_poll_mux(struct p9_conn *m) -{ - int n; + if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ + u16 tag; + P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n"); - if (m->err < 0) - return; + n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ + if (n >= m->client->msize) { + P9_DPRINTK(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); + err = -EIO; + goto error; + } - n = p9_fd_poll(m->trans, NULL); - if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n); - if (n >= 0) - n = -ECONNRESET; - p9_conn_cancel(m, n); - } + tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ + P9_DPRINTK(P9_DEBUG_TRANS, + "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); - if (n & POLLIN) { - set_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); - if (!test_and_set_bit(Rworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + m->req = p9_tag_lookup(m->client, tag); + if (!m->req) { + P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", + tag); + err = -EIO; + goto error; } - } - if (n & POLLOUT) { - set_bit(Wpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); - if ((m->wsize || !list_empty(&m->unsent_req_list)) - && !test_and_set_bit(Wworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + if (m->req->rc == NULL) { + m->req->rc = kmalloc(sizeof(struct p9_fcall) + + m->client->msize, GFP_KERNEL); + if (!m->req->rc) { + m->req = NULL; + err = -ENOMEM; + goto error; + } } + m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall); + memcpy(m->rbuf, m->tmp_buf, m->rsize); + m->rsize = n; } + + /* not an else because some packets (like clunk) have no payload */ + if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ + P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); + spin_lock(&m->client->lock); + list_del(&m->req->req_list); + spin_unlock(&m->client->lock); + p9_client_cb(m->client, m->req); + + m->rbuf = NULL; + m->rpos = 0; + m->rsize = 0; + m->req = NULL; + } + + if (!list_empty(&m->req_list)) { + if (test_and_clear_bit(Rpending, &m->wsched)) + n = POLLIN; + else + n = p9_fd_poll(m->client, NULL); + + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } else + clear_bit(Rworksched, &m->wsched); + } else + clear_bit(Rworksched, &m->wsched); + + return; +error: + p9_conn_cancel(m, err); + clear_bit(Rworksched, &m->wsched); } /** - * p9_poll_proc - poll worker thread - * @a: thread state and arguments - * - * polls all v9fs transports for new events and queues the appropriate - * work to the work queue + * p9_fd_write - write to a socket + * @client: client instance + * @v: buffer to send data from + * @len: size of send buffer * */ -static int p9_poll_proc(void *a) +static int p9_fd_write(struct p9_client *client, void *v, int len) { - struct p9_conn *m, *mtmp; - struct p9_mux_poll_task *vpt; + int ret; + mm_segment_t oldfs; + struct p9_trans_fd *ts = NULL; - vpt = a; - P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt); - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); + if (client && client->status != Disconnected) + ts = client->trans; - list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { - p9_poll_mux(m); - } + if (!ts) + return -EREMOTEIO; - P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n"); - schedule_timeout(SCHED_TIMEOUT * HZ); - } + if (!(ts->wr->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); - __set_current_state(TASK_RUNNING); - P9_DPRINTK(P9_DEBUG_MUX, "finish\n"); - return 0; + oldfs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); + set_fs(oldfs); + + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; } /** @@ -584,7 +435,7 @@ static void p9_write_work(struct work_struct *work) { int n, err; struct p9_conn *m; - struct p9_req *req; + struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); @@ -599,25 +450,23 @@ static void p9_write_work(struct work_struct *work) return; } - spin_lock(&m->lock); -again: - req = list_entry(m->unsent_req_list.next, struct p9_req, + spin_lock(&m->client->lock); + req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); + req->status = REQ_STATUS_SENT; list_move_tail(&req->req_list, &m->req_list); - if (req->err == ERREQFLUSH) - goto again; - m->wbuf = req->tcall->sdata; - m->wsize = req->tcall->size; + m->wbuf = req->tc->sdata; + m->wsize = req->tc->size; m->wpos = 0; - spin_unlock(&m->lock); + spin_unlock(&m->client->lock); } - P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); - err = p9_fd_write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err); + err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) { clear_bit(Wworksched, &m->wsched); return; @@ -638,10 +487,10 @@ again: if (test_and_clear_bit(Wpending, &m->wsched)) n = POLLOUT; else - n = p9_fd_poll(m->trans, NULL); + n = p9_fd_poll(m->client, NULL); if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); + P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); queue_work(p9_mux_wq, &m->wq); } else clear_bit(Wworksched, &m->wsched); @@ -655,504 +504,197 @@ error: clear_bit(Wworksched, &m->wsched); } -static void process_request(struct p9_conn *m, struct p9_req *req) +static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) { - int ecode; - struct p9_str *ename; - - if (!req->err && req->rcall->id == P9_RERROR) { - ecode = req->rcall->params.rerror.errno; - ename = &req->rcall->params.rerror.error; - - P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len, - ename->str); - - if (m->extended) - req->err = -ecode; + struct p9_poll_wait *pwait = + container_of(wait, struct p9_poll_wait, wait); + struct p9_conn *m = pwait->conn; + unsigned long flags; + DECLARE_WAITQUEUE(dummy_wait, p9_poll_task); - if (!req->err) { - req->err = p9_errstr2errno(ename->str, ename->len); + spin_lock_irqsave(&p9_poll_lock, flags); + if (list_empty(&m->poll_pending_link)) + list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); + spin_unlock_irqrestore(&p9_poll_lock, flags); - /* string match failed */ - if (!req->err) { - PRINT_FCALL_ERROR("unknown error", req->rcall); - req->err = -ESERVERFAULT; - } - } - } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { - P9_DPRINTK(P9_DEBUG_ERROR, - "fcall mismatch: expected %d, got %d\n", - req->tcall->id + 1, req->rcall->id); - if (!req->err) - req->err = -EIO; - } + /* perform the default wake up operation */ + return default_wake_function(&dummy_wait, mode, sync, key); } /** - * p9_read_work - called when there is some data to be read from a transport - * @work: container of work to be done + * p9_pollwait - add poll task to the wait queue + * @filp: file pointer being polled + * @wait_address: wait_q to block on + * @p: poll state * + * called by files poll operation to add v9fs-poll task to files wait queue */ -static void p9_read_work(struct work_struct *work) +static void +p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { - int n, err; - struct p9_conn *m; - struct p9_req *req, *rptr, *rreq; - struct p9_fcall *rcall; - char *rbuf; - - m = container_of(work, struct p9_conn, rq); - - if (m->err < 0) - return; - - rcall = NULL; - P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); + struct p9_conn *m = container_of(p, struct p9_conn, pt); + struct p9_poll_wait *pwait = NULL; + int i; - if (!m->rcall) { - m->rcall = - kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + if (m->poll_wait[i].wait_addr == NULL) { + pwait = &m->poll_wait[i]; + break; } - - m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); - m->rpos = 0; } - clear_bit(Rpending, &m->wsched); - err = p9_fd_read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); + if (!pwait) { + P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } - if (err <= 0) - goto error; - - m->rpos += err; - while (m->rpos > 4) { - n = le32_to_cpu(*(__le32 *) m->rbuf); - if (n >= m->msize) { - P9_DPRINTK(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", n); - err = -EIO; - goto error; - } - - if (m->rpos < n) - break; - - err = - p9_deserialize_fcall(m->rbuf, n, m->rcall, m->extended); - if (err < 0) - goto error; - -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; - - p9_printfcall(buf, sizeof(buf), m->rcall, - m->extended); - printk(KERN_NOTICE ">>> %p %s\n", m, buf); - } -#endif - - rcall = m->rcall; - rbuf = m->rbuf; - if (m->rpos > n) { - m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize, - GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; - } - - m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); - memmove(m->rbuf, rbuf + n, m->rpos - n); - m->rpos -= n; - } else { - m->rcall = NULL; - m->rbuf = NULL; - m->rpos = 0; - } - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, - rcall->id, rcall->tag); - - req = NULL; - spin_lock(&m->lock); - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == rcall->tag) { - req = rreq; - if (req->flush != Flushing) - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); - - if (req) { - req->rcall = rcall; - process_request(m, req); - - if (req->flush != Flushing) { - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - } - } else { - if (err >= 0 && rcall->id != P9_RFLUSH) - P9_DPRINTK(P9_DEBUG_ERROR, - "unexpected response mux %p id %d tag %d\n", - m, rcall->id, rcall->tag); - kfree(rcall); - } - } - - if (!list_empty(&m->req_list)) { - if (test_and_clear_bit(Rpending, &m->wsched)) - n = POLLIN; - else - n = p9_fd_poll(m->trans, NULL); - - if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); - - return; - -error: - p9_conn_cancel(m, err); - clear_bit(Rworksched, &m->wsched); + pwait->conn = m; + pwait->wait_addr = wait_address; + init_waitqueue_func_entry(&pwait->wait, p9_pollwake); + add_wait_queue(wait_address, &pwait->wait); } /** - * p9_send_request - send 9P request - * The function can sleep until the request is scheduled for sending. - * The function can be interrupted. Return from the function is not - * a guarantee that the request is sent successfully. Can return errors - * that can be retrieved by PTR_ERR macros. - * - * @m: mux data - * @tc: request to be sent - * @cb: callback function to call when response is received - * @cba: parameter to pass to the callback function + * p9_conn_create - allocate and initialize the per-session mux data + * @client: client instance * + * Note: Creates the polling task if this is the first session. */ -static struct p9_req *p9_send_request(struct p9_conn *m, - struct p9_fcall *tc, - p9_conn_req_callback cb, void *cba) +static struct p9_conn *p9_conn_create(struct p9_client *client) { int n; - struct p9_req *req; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, - tc, tc->id); - if (m->err < 0) - return ERR_PTR(m->err); - - req = kmalloc(sizeof(struct p9_req), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - if (tc->id == P9_TVERSION) - n = P9_NOTAG; - else - n = p9_mux_get_tag(m); + struct p9_conn *m; - if (n < 0) { - kfree(req); + P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client, + client->msize); + m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); + if (!m) return ERR_PTR(-ENOMEM); - } - p9_set_tag(tc, n); + INIT_LIST_HEAD(&m->mux_list); + m->client = client; -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + INIT_WORK(&m->rq, p9_read_work); + INIT_WORK(&m->wq, p9_write_work); + INIT_LIST_HEAD(&m->poll_pending_link); + init_poll_funcptr(&m->pt, p9_pollwait); - p9_printfcall(buf, sizeof(buf), tc, m->extended); - printk(KERN_NOTICE "<<< %p %s\n", m, buf); + n = p9_fd_poll(client, &m->pt); + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + set_bit(Rpending, &m->wsched); } -#endif - - spin_lock_init(&req->lock); - req->tag = n; - req->tcall = tc; - req->rcall = NULL; - req->err = 0; - req->cb = cb; - req->cba = cba; - req->flush = None; - - spin_lock(&m->lock); - list_add_tail(&req->req_list, &m->unsent_req_list); - spin_unlock(&m->lock); - - if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; - else - n = p9_fd_poll(m->trans, NULL); - if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(p9_mux_wq, &m->wq); + if (n & POLLOUT) { + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + set_bit(Wpending, &m->wsched); + } - return req; + return m; } -static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) -{ - p9_mux_put_tag(m, req->tag); - kfree(req); -} +/** + * p9_poll_mux - polls a mux and schedules read or write works if necessary + * @m: connection to poll + * + */ -static void p9_mux_flush_cb(struct p9_req *freq, void *a) +static void p9_poll_mux(struct p9_conn *m) { - int tag; - struct p9_conn *m; - struct p9_req *req, *rreq, *rptr; - - m = a; - P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, - freq->tcall, freq->rcall, freq->err, - freq->tcall->params.tflush.oldtag); - - spin_lock(&m->lock); - tag = freq->tcall->params.tflush.oldtag; - req = NULL; - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == tag) { - req = rreq; - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); + int n; - if (req) { - spin_lock(&req->lock); - req->flush = Flushed; - spin_unlock(&req->lock); + if (m->err < 0) + return; - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); + n = p9_fd_poll(m->client, NULL); + if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { + P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); + if (n >= 0) + n = -ECONNRESET; + p9_conn_cancel(m, n); } - kfree(freq->tcall); - kfree(freq->rcall); - p9_mux_free_request(m, freq); -} - -static int -p9_mux_flush_request(struct p9_conn *m, struct p9_req *req) -{ - struct p9_fcall *fc; - struct p9_req *rreq, *rptr; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); - - /* if a response was received for a request, do nothing */ - spin_lock(&req->lock); - if (req->rcall || req->err) { - spin_unlock(&req->lock); - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p req %p response already received\n", m, req); - return 0; + if (n & POLLIN) { + set_bit(Rpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + if (!test_and_set_bit(Rworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } } - req->flush = Flushing; - spin_unlock(&req->lock); - - spin_lock(&m->lock); - /* if the request is not sent yet, just remove it from the list */ - list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { - if (rreq->tag == req->tag) { - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p req %p request is not sent yet\n", m, req); - list_del(&rreq->req_list); - req->flush = Flushed; - spin_unlock(&m->lock); - if (req->cb) - (*req->cb) (req, req->cba); - return 0; + if (n & POLLOUT) { + set_bit(Wpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + if ((m->wsize || !list_empty(&m->unsent_req_list)) + && !test_and_set_bit(Wworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + queue_work(p9_mux_wq, &m->wq); } } - spin_unlock(&m->lock); - - clear_thread_flag(TIF_SIGPENDING); - fc = p9_create_tflush(req->tag); - p9_send_request(m, fc, p9_mux_flush_cb, m); - return 1; -} - -static void -p9_conn_rpc_cb(struct p9_req *req, void *a) -{ - struct p9_mux_rpc *r; - - P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a); - r = a; - r->rcall = req->rcall; - r->err = req->err; - - if (req->flush != None && !req->err) - r->err = -ERESTARTSYS; - - wake_up(&r->wqueue); } /** - * p9_fd_rpc- sends 9P request and waits until a response is available. - * The function can be interrupted. - * @t: transport data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored + * p9_fd_request - send 9P request + * The function can sleep until the request is scheduled for sending. + * The function can be interrupted. Return from the function is not + * a guarantee that the request is sent successfully. + * + * @client: client instance + * @req: request to be sent * */ -int -p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) +static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { - struct p9_trans_fd *p = t->priv; - struct p9_conn *m = p->conn; - int err, sigpending; - unsigned long flags; - struct p9_req *req; - struct p9_mux_rpc r; - - r.err = 0; - r.tcall = tc; - r.rcall = NULL; - r.m = m; - init_waitqueue_head(&r.wqueue); - - if (rc) - *rc = NULL; - - sigpending = 0; - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } - - req = p9_send_request(m, tc, p9_conn_rpc_cb, &r); - if (IS_ERR(req)) { - err = PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); - return err; - } + int n; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = ts->conn; - err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); - if (r.err < 0) - err = r.err; - - if (err == -ERESTARTSYS && m->trans->status == Connected - && m->err == 0) { - if (p9_mux_flush_request(m, req)) { - /* wait until we get response of the flush message */ - do { - clear_thread_flag(TIF_SIGPENDING); - err = wait_event_interruptible(r.wqueue, - r.rcall || r.err); - } while (!r.rcall && !r.err && err == -ERESTARTSYS && - m->trans->status == Connected && !m->err); - - err = -ERESTARTSYS; - } - sigpending = 1; - } + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, + current, req->tc, req->tc->id); + if (m->err < 0) + return m->err; - if (sigpending) { - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - } + spin_lock(&client->lock); + req->status = REQ_STATUS_UNSENT; + list_add_tail(&req->req_list, &m->unsent_req_list); + spin_unlock(&client->lock); - if (rc) - *rc = r.rcall; + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; else - kfree(r.rcall); + n = p9_fd_poll(m->client, NULL); - p9_mux_free_request(m, req); - if (err > 0) - err = -EIO; + if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) + queue_work(p9_mux_wq, &m->wq); - return err; + return 0; } -#ifdef P9_NONBLOCK -/** - * p9_conn_rpcnb - sends 9P request without waiting for response. - * @m: mux data - * @tc: request to be sent - * @cb: callback function to be called when response arrives - * @a: value to pass to the callback function - * - */ - -int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, - p9_conn_req_callback cb, void *a) +static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { - int err; - struct p9_req *req; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = ts->conn; + int ret = 1; - req = p9_send_request(m, tc, cb, a); - if (IS_ERR(req)) { - err = PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); - return PTR_ERR(req); - } + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); - return 0; -} -#endif /* P9_NONBLOCK */ + spin_lock(&client->lock); + list_del(&req->req_list); -/** - * p9_conn_cancel - cancel all pending requests with error - * @m: mux data - * @err: error code - * - */ - -void p9_conn_cancel(struct p9_conn *m, int err) -{ - struct p9_req *req, *rtmp; - LIST_HEAD(cancel_list); - - P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - m->err = err; - spin_lock(&m->lock); - list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - list_move(&req->req_list, &cancel_list); + if (req->status == REQ_STATUS_UNSENT) { + req->status = REQ_STATUS_FLSHD; + ret = 0; } - list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - spin_unlock(&m->lock); - list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); - if (!req->err) - req->err = err; + spin_unlock(&client->lock); - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - } + return ret; } /** @@ -1216,7 +758,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) return 0; } -static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) +static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); @@ -1234,13 +776,13 @@ static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) return -EIO; } - trans->priv = ts; - trans->status = Connected; + client->trans = ts; + client->status = Connected; return 0; } -static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) +static int p9_socket_open(struct p9_client *client, struct socket *csocket) { int fd, ret; @@ -1251,137 +793,65 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) return fd; } - ret = p9_fd_open(trans, fd, fd); + ret = p9_fd_open(client, fd, fd); if (ret < 0) { P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n"); sockfd_put(csocket); return ret; } - ((struct p9_trans_fd *)trans->priv)->rd->f_flags |= O_NONBLOCK; + ((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK; return 0; } /** - * p9_fd_read- read from a fd - * @trans: transport instance state - * @v: buffer to receive data into - * @len: size of receive buffer - * - */ - -static int p9_fd_read(struct p9_trans *trans, void *v, int len) -{ - int ret; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status != Disconnected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!(ts->rd->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); - - ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -/** - * p9_fd_write - write to a socket - * @trans: transport instance state - * @v: buffer to send data from - * @len: size of send buffer + * p9_mux_destroy - cancels all pending requests and frees mux resources + * @m: mux to destroy * */ -static int p9_fd_write(struct p9_trans *trans, void *v, int len) -{ - int ret; - mm_segment_t oldfs; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status != Disconnected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!(ts->wr->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); - - oldfs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); - set_fs(oldfs); - - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -static unsigned int -p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) +static void p9_conn_destroy(struct p9_conn *m) { - int ret, n; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status == Connected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!ts->rd->f_op || !ts->rd->f_op->poll) - return -EIO; - - if (!ts->wr->f_op || !ts->wr->f_op->poll) - return -EIO; + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, + m->mux_list.prev, m->mux_list.next); - ret = ts->rd->f_op->poll(ts->rd, pt); - if (ret < 0) - return ret; + p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); - if (ts->rd != ts->wr) { - n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) - return n; - ret = (ret & ~POLLOUT) | (n & ~POLLIN); - } + p9_conn_cancel(m, -ECONNRESET); - return ret; + m->client = NULL; + kfree(m); } /** - * p9_fd_close - shutdown socket - * @trans: private socket structure + * p9_fd_close - shutdown file descriptor transport + * @client: client instance * */ -static void p9_fd_close(struct p9_trans *trans) +static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; - if (!trans) + if (!client) return; - ts = xchg(&trans->priv, NULL); - + ts = client->trans; if (!ts) return; + client->status = Disconnected; + p9_conn_destroy(ts->conn); - trans->status = Disconnected; if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); + kfree(ts); } @@ -1402,31 +872,23 @@ static inline int valid_ipaddr4(const char *buf) return 0; } -static struct p9_trans * -p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) +static int +p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; - struct p9_trans *trans; struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; - struct p9_trans_fd *p; + struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ err = parse_opts(args, &opts); if (err < 0) - return ERR_PTR(err); + return err; if (valid_ipaddr4(addr) < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; csocket = NULL; - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - trans->msize = msize; - trans->extended = dotu; - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); @@ -1449,45 +911,38 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) goto error; } - err = p9_socket_open(trans, csocket); + err = p9_socket_open(client, csocket); if (err < 0) goto error; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; goto error; } - return trans; + return 0; error: if (csocket) sock_release(csocket); - kfree(trans); - return ERR_PTR(err); + kfree(p); + + return err; } -static struct p9_trans * -p9_trans_create_unix(const char *addr, char *args, int msize, - unsigned char dotu) +static int +p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; - struct p9_trans *trans; - struct p9_trans_fd *p; + struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ csocket = NULL; - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; if (strlen(addr) > UNIX_PATH_MAX) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", @@ -1508,79 +963,69 @@ p9_trans_create_unix(const char *addr, char *args, int msize, goto error; } - err = p9_socket_open(trans, csocket); + err = p9_socket_open(client, csocket); if (err < 0) goto error; - trans->msize = msize; - trans->extended = dotu; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; goto error; } - return trans; + return 0; error: if (csocket) sock_release(csocket); - kfree(trans); - return ERR_PTR(err); + kfree(p); + return err; } -static struct p9_trans * -p9_trans_create_fd(const char *name, char *args, int msize, - unsigned char extended) +static int +p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; - struct p9_trans *trans; struct p9_fd_opts opts; - struct p9_trans_fd *p; + struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */ parse_opts(args, &opts); if (opts.rfd == ~0 || opts.wfd == ~0) { printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); - return ERR_PTR(-ENOPROTOOPT); + return -ENOPROTOOPT; } - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; - - err = p9_fd_open(trans, opts.rfd, opts.wfd); + err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) goto error; - trans->msize = msize; - trans->extended = extended; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; goto error; } - return trans; + return 0; error: - kfree(trans); - return ERR_PTR(err); + kfree(p); + return err; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, .def = 1, - .create = p9_trans_create_tcp, + .create = p9_fd_create_tcp, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, .owner = THIS_MODULE, }; @@ -1588,7 +1033,10 @@ static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, - .create = p9_trans_create_unix, + .create = p9_fd_create_unix, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, .owner = THIS_MODULE, }; @@ -1596,23 +1044,71 @@ static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, - .create = p9_trans_create_fd, + .create = p9_fd_create, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, .owner = THIS_MODULE, }; -int p9_trans_fd_init(void) +/** + * p9_poll_proc - poll worker thread + * @a: thread state and arguments + * + * polls all v9fs transports for new events and queues the appropriate + * work to the work queue + * + */ + +static int p9_poll_proc(void *a) { - int i; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); + repeat: + spin_lock_irqsave(&p9_poll_lock, flags); + while (!list_empty(&p9_poll_pending_list)) { + struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, + struct p9_conn, + poll_pending_link); + list_del_init(&conn->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); + + p9_poll_mux(conn); + + spin_lock_irqsave(&p9_poll_lock, flags); + } + spin_unlock_irqrestore(&p9_poll_lock, flags); + + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&p9_poll_pending_list)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); + schedule(); + } + __set_current_state(TASK_RUNNING); + + if (!kthread_should_stop()) + goto repeat; - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) - p9_mux_poll_tasks[i].task = NULL; + P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); + return 0; +} +int p9_trans_fd_init(void) +{ p9_mux_wq = create_workqueue("v9fs"); if (!p9_mux_wq) { printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); return -ENOMEM; } + p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); + if (IS_ERR(p9_poll_task)) { + destroy_workqueue(p9_mux_wq); + printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); + return PTR_ERR(p9_poll_task); + } + v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); @@ -1622,6 +1118,7 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { + kthread_stop(p9_poll_task); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 94912e077a5..2d7781ec663 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -1,12 +1,10 @@ /* - * The Guest 9p transport driver + * The Virtio 9p transport driver * * This is a block based transport driver based on the lguest block driver * code. * - */ -/* - * Copyright (C) 2007 Eric Van Hensbergen, IBM Corporation + * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation * * Based on virtio console driver * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation @@ -41,6 +39,7 @@ #include <linux/file.h> #include <net/9p/9p.h> #include <linux/parser.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/scatterlist.h> #include <linux/virtio.h> @@ -53,50 +52,6 @@ static DEFINE_MUTEX(virtio_9p_lock); /* global which tracks highest initialized channel */ static int chan_index; -#define P9_INIT_MAXTAG 16 - - -/** - * enum p9_req_status_t - virtio request status - * @REQ_STATUS_IDLE: request slot unused - * @REQ_STATUS_SENT: request sent to server - * @REQ_STATUS_RCVD: response received from server - * @REQ_STATUS_FLSH: request has been flushed - * - * The @REQ_STATUS_IDLE state is used to mark a request slot as unused - * but use is actually tracked by the idpool structure which handles tag - * id allocation. - * - */ - -enum p9_req_status_t { - REQ_STATUS_IDLE, - REQ_STATUS_SENT, - REQ_STATUS_RCVD, - REQ_STATUS_FLSH, -}; - -/** - * struct p9_req_t - virtio request slots - * @status: status of this request slot - * @wq: wait_queue for the client to block on for this request - * - * The virtio transport uses an array to track outstanding requests - * instead of a list. While this may incurr overhead during initial - * allocation or expansion, it makes request lookup much easier as the - * tag id is a index into an array. (We use tag+1 so that we can accomodate - * the -1 tag for the T_VERSION request). - * This also has the nice effect of only having to allocate wait_queues - * once, instead of constantly allocating and freeing them. Its possible - * other resources could benefit from this scheme as well. - * - */ - -struct p9_req_t { - int status; - wait_queue_head_t *wq; -}; - /** * struct virtio_chan - per-instance transport information * @initialized: whether the channel is initialized @@ -121,67 +76,14 @@ static struct virtio_chan { spinlock_t lock; + struct p9_client *client; struct virtio_device *vdev; struct virtqueue *vq; - struct p9_idpool *tagpool; - struct p9_req_t *reqs; - int max_tag; - /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; } channels[MAX_9P_CHAN]; -/** - * p9_lookup_tag - Lookup requests by tag - * @c: virtio channel to lookup tag within - * @tag: numeric id for transaction - * - * this is a simple array lookup, but will grow the - * request_slots as necessary to accomodate transaction - * ids which did not previously have a slot. - * - * Bugs: there is currently no upper limit on request slots set - * here, but that should be constrained by the id accounting. - */ - -static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag) -{ - /* This looks up the original request by tag so we know which - * buffer to read the data into */ - tag++; - - while (tag >= c->max_tag) { - int old_max = c->max_tag; - int count; - - if (c->max_tag) - c->max_tag *= 2; - else - c->max_tag = P9_INIT_MAXTAG; - - c->reqs = krealloc(c->reqs, sizeof(struct p9_req_t)*c->max_tag, - GFP_ATOMIC); - if (!c->reqs) { - printk(KERN_ERR "Couldn't grow tag array\n"); - BUG(); - } - for (count = old_max; count < c->max_tag; count++) { - c->reqs[count].status = REQ_STATUS_IDLE; - c->reqs[count].wq = kmalloc(sizeof(wait_queue_head_t), - GFP_ATOMIC); - if (!c->reqs[count].wq) { - printk(KERN_ERR "Couldn't grow tag array\n"); - BUG(); - } - init_waitqueue_head(c->reqs[count].wq); - } - } - - return &c->reqs[tag]; -} - - /* How many bytes left in this page. */ static unsigned int rest_of_page(void *data) { @@ -197,25 +99,13 @@ static unsigned int rest_of_page(void *data) * */ -static void p9_virtio_close(struct p9_trans *trans) +static void p9_virtio_close(struct p9_client *client) { - struct virtio_chan *chan = trans->priv; - int count; - unsigned long flags; - - spin_lock_irqsave(&chan->lock, flags); - p9_idpool_destroy(chan->tagpool); - for (count = 0; count < chan->max_tag; count++) - kfree(chan->reqs[count].wq); - kfree(chan->reqs); - chan->max_tag = 0; - spin_unlock_irqrestore(&chan->lock, flags); + struct virtio_chan *chan = client->trans; mutex_lock(&virtio_9p_lock); chan->inuse = false; mutex_unlock(&virtio_9p_lock); - - kfree(trans); } /** @@ -236,17 +126,16 @@ static void req_done(struct virtqueue *vq) struct virtio_chan *chan = vq->vdev->priv; struct p9_fcall *rc; unsigned int len; - unsigned long flags; struct p9_req_t *req; - spin_lock_irqsave(&chan->lock, flags); + P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); + while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { - req = p9_lookup_tag(chan, rc->tag); - req->status = REQ_STATUS_RCVD; - wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); + P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + req = p9_tag_lookup(chan->client, rc->tag); + p9_client_cb(chan->client, req); } - /* In case queue is stopped waiting for more buffers. */ - spin_unlock_irqrestore(&chan->lock, flags); } /** @@ -283,8 +172,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, return index-start; } +/* We don't currently allow canceling of virtio requests */ +static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + /** - * p9_virtio_rpc - issue a request and wait for a response + * p9_virtio_request - issue a request * @t: transport state * @tc: &p9_fcall request to transmit * @rc: &p9_fcall to put reponse into @@ -292,44 +187,22 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, */ static int -p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) +p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { int in, out; - int n, err, size; - struct virtio_chan *chan = t->priv; - char *rdata; - struct p9_req_t *req; - unsigned long flags; - - if (*rc == NULL) { - *rc = kmalloc(sizeof(struct p9_fcall) + t->msize, GFP_KERNEL); - if (!*rc) - return -ENOMEM; - } - - rdata = (char *)*rc+sizeof(struct p9_fcall); - - n = P9_NOTAG; - if (tc->id != P9_TVERSION) { - n = p9_idpool_get(chan->tagpool); - if (n < 0) - return -ENOMEM; - } - - spin_lock_irqsave(&chan->lock, flags); - req = p9_lookup_tag(chan, n); - spin_unlock_irqrestore(&chan->lock, flags); + struct virtio_chan *chan = client->trans; + char *rdata = (char *)req->rc+sizeof(struct p9_fcall); - p9_set_tag(tc, n); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n); - - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, t->msize); + out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, + req->tc->size); + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, + client->msize); req->status = REQ_STATUS_SENT; - if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, tc)) { + if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) { P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc add_buf returned failure"); return -EIO; @@ -337,31 +210,7 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) chan->vq->vq_ops->kick(chan->vq); - wait_event(*req->wq, req->status == REQ_STATUS_RCVD); - - size = le32_to_cpu(*(__le32 *) rdata); - - err = p9_deserialize_fcall(rdata, size, *rc, t->extended); - if (err < 0) { - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: virtio rpc deserialize returned %d\n", err); - return err; - } - -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; - - p9_printfcall(buf, sizeof(buf), *rc, t->extended); - printk(KERN_NOTICE ">>> %p %s\n", t, buf); - } -#endif - - if (n != P9_NOTAG && p9_idpool_check(n, chan->tagpool)) - p9_idpool_put(n, chan->tagpool); - - req->status = REQ_STATUS_IDLE; - + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); return 0; } @@ -422,10 +271,9 @@ fail: /** * p9_virtio_create - allocate a new virtio channel + * @client: client instance invoking this transport * @devname: string identifying the channel to connect to (unused) * @args: args passed from sys_mount() for per-transport options (unused) - * @msize: requested maximum packet size - * @extended: 9p2000.u enabled flag * * This sets up a transport channel for 9p communication. Right now * we only match the first available channel, but eventually we couldlook up @@ -441,11 +289,9 @@ fail: * */ -static struct p9_trans * -p9_virtio_create(const char *devname, char *args, int msize, - unsigned char extended) +static int +p9_virtio_create(struct p9_client *client, const char *devname, char *args) { - struct p9_trans *trans; struct virtio_chan *chan = channels; int index = 0; @@ -463,30 +309,13 @@ p9_virtio_create(const char *devname, char *args, int msize, if (index >= MAX_9P_CHAN) { printk(KERN_ERR "9p: no channels available\n"); - return ERR_PTR(-ENODEV); + return -ENODEV; } - chan->tagpool = p9_idpool_create(); - if (IS_ERR(chan->tagpool)) { - printk(KERN_ERR "9p: couldn't allocate tagpool\n"); - return ERR_PTR(-ENOMEM); - } - p9_idpool_get(chan->tagpool); /* reserve tag 0 */ - chan->max_tag = 0; - chan->reqs = NULL; - - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) { - printk(KERN_ERR "9p: couldn't allocate transport\n"); - return ERR_PTR(-ENOMEM); - } - trans->extended = extended; - trans->msize = msize; - trans->close = p9_virtio_close; - trans->rpc = p9_virtio_rpc; - trans->priv = chan; + client->trans = (void *)chan; + chan->client = client; - return trans; + return 0; } /** @@ -526,6 +355,9 @@ static struct virtio_driver p9_virtio_drv = { static struct p9_trans_module p9_virtio_trans = { .name = "virtio", .create = p9_virtio_create, + .close = p9_virtio_close, + .request = p9_virtio_request, + .cancel = p9_virtio_cancel, .maxsize = PAGE_SIZE*16, .def = 0, .owner = THIS_MODULE, diff --git a/net/9p/util.c b/net/9p/util.c index 958fc58cd1f..dc4ec05ad93 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -105,6 +105,7 @@ retry: else if (error) return -1; + P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p); return i; } EXPORT_SYMBOL(p9_idpool_get); @@ -121,6 +122,9 @@ EXPORT_SYMBOL(p9_idpool_get); void p9_idpool_put(int id, struct p9_idpool *p) { unsigned long flags; + + P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p); + spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); spin_unlock_irqrestore(&p->lock, flags); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6348e078aa..8f9431a12c6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -37,10 +37,7 @@ #include <linux/poll.h> #include <net/sock.h> #include <asm/ioctls.h> - -#if defined(CONFIG_KMOD) #include <linux/kmod.h> -#endif #include <net/bluetooth/bluetooth.h> @@ -145,11 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto) if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; -#if defined(CONFIG_KMOD) - if (!bt_proto[proto]) { + if (!bt_proto[proto]) request_module("bt-proto-%d", proto); - } -#endif err = -EPROTONOSUPPORT; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index a4abed5b4c4..fa5cda4e552 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; } *d = (struct net_device *)in; - NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, (struct net_device *)out, br_nf_forward_finish); return NF_STOLEN; diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 366d3e9d51f..ba6f73eb06c 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,6 +4,7 @@ menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" + depends on BRIDGE && BRIDGE_NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5bb88eb0aad..0fa208e8640 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -305,23 +305,14 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error, return NULL; } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else static void * find_inlist_lock(struct list_head *head, const char *name, const char *prefix, int *error, struct mutex *mutex) { - void *ret; - - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); - } - return ret; + return try_then_request_module( + find_inlist_lock_noload(head, name, error, mutex), + "%s%s", prefix, name); } -#endif static inline struct ebt_table * find_table_lock(const char *name, int *error, struct mutex *mutex) diff --git a/net/can/af_can.c b/net/can/af_can.c index 8035fbf526a..7d4d2b3c137 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -128,8 +128,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol) if (net != &init_net) return -EAFNOSUPPORT; -#ifdef CONFIG_KMOD - /* try to load protocol module, when CONFIG_KMOD is defined */ +#ifdef CONFIG_MODULES + /* try to load protocol module kernel is modular */ if (!proto_tab[protocol]) { err = request_module("can-proto-%d", protocol); diff --git a/net/core/dev.c b/net/core/dev.c index 1408a083fe4..b8a4fd0806a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -924,10 +924,10 @@ int dev_change_name(struct net_device *dev, const char *newname) strlcpy(dev->name, newname, IFNAMSIZ); rollback: - err = device_rename(&dev->dev, dev->name); - if (err) { + ret = device_rename(&dev->dev, dev->name); + if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); - return err; + return ret; } write_lock_bh(&dev_base_lock); @@ -4956,8 +4956,6 @@ EXPORT_SYMBOL(br_fdb_get_hook); EXPORT_SYMBOL(br_fdb_put_hook); #endif -#ifdef CONFIG_KMOD EXPORT_SYMBOL(dev_load); -#endif EXPORT_PER_CPU_SYMBOL(softnet_data); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b0dc818a91d..f1d07b5c1e1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -96,7 +96,7 @@ static void net_free(struct net *net) return; } #endif - + kfree(net->gen); kmem_cache_free(net_cachep, net); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3630131fa1f..31f29d2989f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1040,7 +1040,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *linkinfo[IFLA_INFO_MAX+1]; int err; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES replay: #endif err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); @@ -1129,7 +1129,7 @@ replay: return -EOPNOTSUPP; if (!ops) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (kind[0]) { __rtnl_unlock(); request_module("rtnl-link-%s", kind); diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 4809753d12a..8fe931a3d7a 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -154,7 +154,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) struct ccid *ccid = NULL; ccids_read_lock(); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (ccids[id] == NULL) { /* We only try to load if in process context */ ccids_read_unlock(); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 11062780bb0..d4ce1224e00 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -259,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) fl.fl6_flowlabel = 0; fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); opt = np->opt; @@ -558,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b2804e2d1b8..e6bf99e3e41 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) struct dccp_request_sock *dreq = dccp_rsk(req); inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; inet_rsk(req)->acked = 0; req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; diff --git a/net/dccp/output.c b/net/dccp/output.c index d06945c7d3d..809d803d500 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); - dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_sport = inet_rsk(req)->loc_port; dh->dccph_dport = inet_rsk(req)->rmt_port; dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 2f0ac3c3eb7..28e26bd08e2 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -152,7 +152,7 @@ static struct dn_dev_parms dn_dev_list[] = { #define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list) -#define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x)) +#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x) #ifdef CONFIG_SYSCTL @@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */ static int dn_forwarding_proc(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); @@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, #endif } -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -490,9 +490,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) return -EFAULT; ifr->ifr_name[IFNAMSIZ-1] = 0; -#ifdef CONFIG_KMOD dev_load(&init_net, ifr->ifr_name); -#endif switch(cmd) { case SIOCGIFADDR: diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 228067c571b..36400b26689 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) } -static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_node_address_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write, } -static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_def_dev_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b043eda60b0..1a9dd66511f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -663,7 +663,7 @@ out: void arp_xmit(struct sk_buff *skb) { /* Send it off, maybe filter it using firewalling first. */ - NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); + NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); } /* @@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); + return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); freeskb: kfree_skb(skb); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b12dae2b0b2..56fce3ab6c5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -613,9 +613,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (colon) *colon = 0; -#ifdef CONFIG_KMOD dev_load(net, ifr.ifr_name); -#endif switch (cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -1283,7 +1281,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, +static int devinet_conf_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1379,12 +1377,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, +int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, - newval, newlen); + int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); struct net *net = table->extra2; if (ret == 1) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 89cb047ab31..564230dabcb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -53,11 +53,9 @@ static DEFINE_MUTEX(inet_diag_table_mutex); static const struct inet_diag_handler *inet_diag_lock_handler(int type) { -#ifdef CONFIG_KMOD if (!inet_diag_table[type]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, NETLINK_INET_DIAG, type); -#endif mutex_lock(&inet_diag_table_mutex); if (!inet_diag_table[type]) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index aa2c50a180f..fa2d6b6fc3e 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -41,12 +41,13 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, int (*okfn)(struct sk_buff *)) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if (skb->nfct) return NF_ACCEPT; #endif - +#endif /* Gather fragments. */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (nf_ct_ipv4_gather_frags(skb, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 2ac9eaf1a8c..a65cf692359 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -584,6 +584,98 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { + [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, + [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, +}; + +static int nfnetlink_parse_nat_proto(struct nlattr *attr, + const struct nf_conn *ct, + struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_PROTONAT_MAX+1]; + const struct nf_nat_protocol *npt; + int err; + + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + if (err < 0) + return err; + + npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); + if (npt->nlattr_to_range) + err = npt->nlattr_to_range(tb, range); + nf_nat_proto_put(npt); + return err; +} + +static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { + [CTA_NAT_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_MAXIP] = { .type = NLA_U32 }, +}; + +static int +nfnetlink_parse_nat(struct nlattr *nat, + const struct nf_conn *ct, struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_NAT_MAX+1]; + int err; + + memset(range, 0, sizeof(*range)); + + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + if (err < 0) + return err; + + if (tb[CTA_NAT_MINIP]) + range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); + + if (!tb[CTA_NAT_MAXIP]) + range->max_ip = range->min_ip; + else + range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); + + if (range->min_ip) + range->flags |= IP_NAT_RANGE_MAP_IPS; + + if (!tb[CTA_NAT_PROTO]) + return 0; + + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); + if (err < 0) + return err; + + return 0; +} + +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + struct nf_nat_range range; + + if (nfnetlink_parse_nat(attr, ct, &range) < 0) + return -EINVAL; + if (nf_nat_initialized(ct, manip)) + return -EEXIST; + + return nf_nat_setup_info(ct, &range, manip); +} +#else +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + return -EOPNOTSUPP; +} +#endif + static int __net_init nf_nat_net_init(struct net *net) { net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, @@ -654,6 +746,9 @@ static int __init nf_nat_init(void) BUG_ON(nf_nat_seq_adjust_hook != NULL); rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); + rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, + nfnetlink_parse_nat_setup); return 0; cleanup_extend: @@ -667,10 +762,12 @@ static void __exit nf_nat_cleanup(void) nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); + rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); synchronize_net(); } MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-ipv4"); module_init(nf_nat_init); module_exit(nf_nat_cleanup); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ffeaffc3fff..8303e4b406c 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); if (*obj == NULL) { + kfree(p); kfree(id); if (net_ratelimit()) printk("OOM in bsalg (%d)\n", __LINE__); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a6d7c584f53..2ea6dcc3e2c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1109,7 +1109,12 @@ restart: printk("\n"); } #endif - rt_hash_table[hash].chain = rt; + /* + * Since lookup is lockfree, we must make sure + * previous writes to rt are comitted to memory + * before making rt visible to other CPUS. + */ + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; return 0; @@ -2908,8 +2913,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, } static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, @@ -2972,16 +2975,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, } static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int old = ip_rt_secret_interval; - int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, - newlen); + int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); rt_secret_reschedule(old); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 276d047fb85..1bb10df8ce7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -64,8 +64,8 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, } /* Validate changes from sysctl interface. */ -static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int ipv4_sysctl_local_port_range(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -80,7 +80,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, }; inet_get_local_port_range(range, range + 1); - ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) ret = -EINVAL; @@ -109,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * return ret; } -static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int sysctl_tcp_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -122,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int ret; tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_default_congestion_control(val); return ret; @@ -165,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int strategy_allowed_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -179,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam return -ENOMEM; tcp_get_available_congestion_control(tbl.data, tbl.maxlen); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 6a250828b76..4ec5b4e97c4 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -115,7 +115,7 @@ int tcp_set_default_congestion_control(const char *name) spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (!ca && capable(CAP_SYS_MODULE)) { spin_unlock(&tcp_cong_list_lock); @@ -244,7 +244,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) if (ca == icsk->icsk_ca_ops) goto out; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES /* not found attempt to autoload module */ if (!ca && capable(CAP_SYS_MODULE)) { rcu_read_unlock(); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7b6a584b62d..eea9542728c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3982,7 +3982,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, } static int addrconf_sysctl_forward_strategy(ctl_table *table, - int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 840b15780a3..172438320ee 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1199,7 +1199,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } neigh->flags |= NTF_ROUTER; } else if (rt) { - rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); } if (rt) @@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f return ret; } -int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; @@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, switch (ctl->ctl_name) { case NET_NEIGH_REACHABLE_TIME: - ret = sysctl_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen); + ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen); break; case NET_NEIGH_RETRANS_TIME_MS: case NET_NEIGH_REACHABLE_TIME_MS: - ret = sysctl_ms_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen); + ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen); break; default: ret = 0; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6b29b03925f..fd5b3a4e332 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -12,6 +12,7 @@ int ip6_route_me_harder(struct sk_buff *skb) { + struct net *net = dev_net(skb->dst->dev); struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; struct flowi fl = { @@ -23,7 +24,7 @@ int ip6_route_me_harder(struct sk_buff *skb) .saddr = iph->saddr, } }, }; - dst = ip6_route_output(dev_net(skb->dst->dev), skb->sk, &fl); + dst = ip6_route_output(net, skb->sk, &fl); #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -33,8 +34,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #endif if (dst->error) { - IP6_INC_STATS(&init_net, ip6_dst_idev(dst), - IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); return -EINVAL; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ec394cf5a19..676c80b5b14 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->mss = mss; ireq->rmt_port = th->source; + ireq->loc_port = th->dest; ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); if (ipv6_opt_accepted(sk, skb) || diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e5310c9b84d..b6b356b7912 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) fl.fl6_flowlabel = 0; fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); opt = np->opt; @@ -1309,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) @@ -1865,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq, i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], - ntohs(inet_sk(sk)->sport), + ntohs(inet_rsk(req)->loc_port), dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->rmt_port), diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 2a451562377..2ad504fc341 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -545,8 +545,12 @@ static int netdev_notify(struct notifier_block *nb, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sprintf(buf, "netdev:%s", dev->name); dir = sdata->debugfsdir; + + if (!dir) + return 0; + + sprintf(buf, "netdev:%s", dev->name); if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf)) printk(KERN_ERR "mac80211: debugfs: failed to rename debugfs " "dir to %s\n", buf); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b9902e425f0..189d0bafa91 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -249,11 +249,22 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DECLARE_MAC_BUF(mbuf); u8 *mac; + sta->debugfs.add_has_run = true; + if (!stations_dir) return; mac = print_mac(mbuf, sta->sta.addr); + /* + * This might fail due to a race condition: + * When mac80211 unlinks a station, the debugfs entries + * remain, but it is already possible to link a new + * station with the same address which triggers adding + * it to debugfs; therefore, if the old station isn't + * destroyed quickly enough the old station's debugfs + * dir might still be around. + */ sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); if (!sta->debugfs.dir) return; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8025b294588..156e42a003a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -816,8 +816,8 @@ struct ieee802_11_elems { u8 *ext_supp_rates; u8 *wmm_info; u8 *wmm_param; - u8 *ht_cap_elem; - u8 *ht_info_elem; + struct ieee80211_ht_cap *ht_cap_elem; + struct ieee80211_ht_addt_info *ht_info_elem; u8 *mesh_config; u8 *mesh_id; u8 *peer_link; @@ -844,8 +844,6 @@ struct ieee802_11_elems { u8 ext_supp_rates_len; u8 wmm_info_len; u8 wmm_param_len; - u8 ht_cap_elem_len; - u8 ht_info_elem_len; u8 mesh_config_len; u8 mesh_id_len; u8 peer_link_len; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 49f86fa56bf..87665d7bb4f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1348,10 +1348,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { struct ieee80211_ht_bss_info bss_info; ieee80211_ht_cap_ie_to_ht_info( - (struct ieee80211_ht_cap *) elems.ht_cap_elem, &sta->sta.ht_info); ieee80211_ht_addt_info_ie_to_ht_bss_info( - (struct ieee80211_ht_addt_info *) elems.ht_info_elem, &bss_info); ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); } @@ -1709,7 +1707,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_ht_bss_info bss_info; ieee80211_ht_addt_info_ie_to_ht_bss_info( - (struct ieee80211_ht_addt_info *) elems.ht_info_elem, &bss_info); changed |= ieee80211_handle_ht(local, 1, &conf->ht_conf, &bss_info); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8e6685e7ae8..416bb41099f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -388,7 +388,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bss = ieee80211_bss_info_update(sdata->local, rx_status, mgmt, skb->len, &elems, freq, beacon); - ieee80211_rx_bss_put(sdata->local, bss); + if (bss) + ieee80211_rx_bss_put(sdata->local, bss); dev_kfree_skb(skb); return RX_QUEUED; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 9b72d15bc8d..7fef8ea1f5e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -635,7 +635,12 @@ static void sta_info_debugfs_add_work(struct work_struct *work) spin_lock_irqsave(&local->sta_lock, flags); list_for_each_entry(tmp, &local->sta_list, list) { - if (!tmp->debugfs.dir) { + /* + * debugfs.add_has_run will be set by + * ieee80211_sta_debugfs_add regardless + * of what else it does. + */ + if (!tmp->debugfs.add_has_run) { sta = tmp; __sta_info_pin(sta); break; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index a6b51862a89..168a39a298b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -300,6 +300,7 @@ struct sta_info { struct dentry *inactive_ms; struct dentry *last_seq_ctrl; struct dentry *agg_status; + bool add_has_run; } debugfs; #endif diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f32561ec224..cee4884b9d0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -529,12 +529,12 @@ void ieee802_11_parse_elems(u8 *start, size_t len, elems->ext_supp_rates_len = elen; break; case WLAN_EID_HT_CAPABILITY: - elems->ht_cap_elem = pos; - elems->ht_cap_elem_len = elen; + if (elen >= sizeof(struct ieee80211_ht_cap)) + elems->ht_cap_elem = (void *)pos; break; case WLAN_EID_HT_EXTRA_INFO: - elems->ht_info_elem = pos; - elems->ht_info_elem_len = elen; + if (elen >= sizeof(struct ieee80211_ht_addt_info)) + elems->ht_info_elem = (void *)pos; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 7e0d53abde2..742f811ca41 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -775,7 +775,7 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev, * configure it here */ if (local->ops->set_frag_threshold) - local->ops->set_frag_threshold( + return local->ops->set_frag_threshold( local_to_hw(local), local->fragmentation_threshold); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 78892cf2b02..25dcef9f219 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -271,7 +271,6 @@ config NF_CONNTRACK_TFTP config NF_CT_NETLINK tristate 'Connection tracking netlink interface' select NETFILTER_NETLINK - depends on NF_NAT=n || NF_NAT default m if NETFILTER_ADVANCED=n help This option enables support for a netlink-based userspace interface diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 05048e40326..79a69805221 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -25,11 +25,13 @@ menuconfig IP_VS if IP_VS config IP_VS_IPV6 - bool "IPv6 support for IPVS (DANGEROUS)" + bool "IPv6 support for IPVS" depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) ---help--- Add IPv6 support to IPVS. This is incomplete and might be dangerous. + See http://www.mindbasket.com/ipvs for more information. + Say N if unsure. config IP_VS_DEBUG diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 03591d37b9c..b92df5c1dfc 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -115,7 +115,7 @@ int nf_conntrack_acct_init(struct net *net) if (net_eq(net, &init_net)) { #ifdef CONFIG_NF_CT_ACCT - printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n"); printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 27de3c7b006..622d7c671cb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -38,9 +38,16 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> +#include <net/netfilter/nf_nat.h> #define NF_CONNTRACK_VERSION "0.5.0" +unsigned int +(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) __read_mostly; +EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); + DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cadfd15b44f..a040d46f85d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -689,71 +689,6 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, return 0; } -#ifdef CONFIG_NF_NAT_NEEDED -static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { - [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, - [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, -}; - -static int nfnetlink_parse_nat_proto(struct nlattr *attr, - const struct nf_conn *ct, - struct nf_nat_range *range) -{ - struct nlattr *tb[CTA_PROTONAT_MAX+1]; - const struct nf_nat_protocol *npt; - int err; - - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); - if (err < 0) - return err; - - npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); - if (npt->nlattr_to_range) - err = npt->nlattr_to_range(tb, range); - nf_nat_proto_put(npt); - return err; -} - -static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { - [CTA_NAT_MINIP] = { .type = NLA_U32 }, - [CTA_NAT_MAXIP] = { .type = NLA_U32 }, -}; - -static inline int -nfnetlink_parse_nat(struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range *range) -{ - struct nlattr *tb[CTA_NAT_MAX+1]; - int err; - - memset(range, 0, sizeof(*range)); - - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); - if (err < 0) - return err; - - if (tb[CTA_NAT_MINIP]) - range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); - - if (!tb[CTA_NAT_MAXIP]) - range->max_ip = range->min_ip; - else - range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); - - if (range->min_ip) - range->flags |= IP_NAT_RANGE_MAP_IPS; - - if (!tb[CTA_NAT_PROTO]) - return 0; - - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); - if (err < 0) - return err; - - return 0; -} -#endif - static inline int ctnetlink_parse_help(struct nlattr *attr, char **helper_name) { @@ -878,6 +813,36 @@ out: return err; } +#ifdef CONFIG_NF_NAT_NEEDED +static int +ctnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; + + parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook); + if (!parse_nat_setup) { +#ifdef CONFIG_MODULES + rcu_read_unlock(); + nfnl_unlock(); + if (request_module("nf-nat-ipv4") < 0) { + nfnl_lock(); + rcu_read_lock(); + return -EOPNOTSUPP; + } + nfnl_lock(); + rcu_read_lock(); + if (nfnetlink_parse_nat_setup_hook) + return -EAGAIN; +#endif + return -EOPNOTSUPP; + } + + return parse_nat_setup(ct, manip, attr); +} +#endif + static int ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) { @@ -897,31 +862,6 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) /* ASSURED bit can only be set */ return -EBUSY; - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { -#ifndef CONFIG_NF_NAT_NEEDED - return -EOPNOTSUPP; -#else - struct nf_nat_range range; - - if (cda[CTA_NAT_DST]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct, - &range) < 0) - return -EINVAL; - if (nf_nat_initialized(ct, IP_NAT_MANIP_DST)) - return -EEXIST; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); - } - if (cda[CTA_NAT_SRC]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct, - &range) < 0) - return -EINVAL; - if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC)) - return -EEXIST; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); - } -#endif - } - /* Be careful here, modifying NAT bits can screw up things, * so don't let users modify them directly if they don't pass * nf_nat_range. */ @@ -929,6 +869,31 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) return 0; } +static int +ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[]) +{ +#ifdef CONFIG_NF_NAT_NEEDED + int ret; + + if (cda[CTA_NAT_DST]) { + ret = ctnetlink_parse_nat_setup(ct, + IP_NAT_MANIP_DST, + cda[CTA_NAT_DST]); + if (ret < 0) + return ret; + } + if (cda[CTA_NAT_SRC]) { + ret = ctnetlink_parse_nat_setup(ct, + IP_NAT_MANIP_SRC, + cda[CTA_NAT_SRC]); + if (ret < 0) + return ret; + } + return 0; +#else + return -EOPNOTSUPP; +#endif +} static inline int ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) @@ -1157,6 +1122,14 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } } + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { + err = ctnetlink_change_nat(ct, cda); + if (err < 0) { + rcu_read_unlock(); + goto err; + } + } + if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) { diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 373e51e91ce..1bc3001d182 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -65,7 +65,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#ifdef DEBUG +#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) /* PptpControlMessageType names */ const char *const pptp_msg_name[] = { "UNKNOWN_MESSAGE", diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b75c9c4a995..9c0ba17a1dd 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,15 +44,17 @@ static struct sock *nfnl = NULL; static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); -static inline void nfnl_lock(void) +void nfnl_lock(void) { mutex_lock(&nfnl_mutex); } +EXPORT_SYMBOL_GPL(nfnl_lock); -static inline void nfnl_unlock(void) +void nfnl_unlock(void) { mutex_unlock(&nfnl_mutex); } +EXPORT_SYMBOL_GPL(nfnl_unlock); int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { @@ -132,9 +134,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return 0; type = nlh->nlmsg_type; +replay: ss = nfnetlink_get_subsys(type); if (!ss) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES nfnl_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); nfnl_lock(); @@ -165,7 +168,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } else return -EINVAL; - return nc->call(nfnl, skb, nlh, cda); + err = nc->call(nfnl, skb, nlh, cda); + if (err == -EAGAIN) + goto replay; + return err; } } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 2cc1fff4930..f9977b3311f 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -48,7 +48,7 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { }, { .name = "NFQUEUE", - .family = NF_ARP, + .family = NFPROTO_ARP, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 6f62c36948d..7ac54eab0b0 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -61,7 +61,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) if (info->flags & IPRANGE_SRC) { m = ntohl(iph->saddr) < ntohl(info->src_min.ip); m |= ntohl(iph->saddr) > ntohl(info->src_max.ip); - m ^= info->flags & IPRANGE_SRC_INV; + m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) { pr_debug("src IP " NIPQUAD_FMT " NOT in range %s" NIPQUAD_FMT "-" NIPQUAD_FMT "\n", @@ -75,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) if (info->flags & IPRANGE_DST) { m = ntohl(iph->daddr) < ntohl(info->dst_min.ip); m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip); - m ^= info->flags & IPRANGE_DST_INV; + m ^= !!(info->flags & IPRANGE_DST_INV); if (m) { pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s" NIPQUAD_FMT "-" NIPQUAD_FMT "\n", @@ -114,14 +114,14 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par) if (info->flags & IPRANGE_SRC) { m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0; m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0; - m ^= info->flags & IPRANGE_SRC_INV; + m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) return false; } if (info->flags & IPRANGE_DST) { m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0; m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0; - m ^= info->flags & IPRANGE_DST_INV; + m ^= !!(info->flags & IPRANGE_DST_INV); if (m) return false; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 4ebd4ca9a99..280c471bcdf 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -318,15 +318,15 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS - t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir, - &recent_mt_fops); + t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir, + &recent_mt_fops, t); if (t->proc == NULL) { kfree(t); goto out; } #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir, - &recent_old_fops); + t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir, + &recent_old_fops, t); if (t->proc_old == NULL) { remove_proc_entry(t->name, proc_old_dir); kfree(t); @@ -334,11 +334,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) } t->proc_old->uid = ip_list_uid; t->proc_old->gid = ip_list_gid; - t->proc_old->data = t; #endif t->proc->uid = ip_list_uid; t->proc->gid = ip_list_gid; - t->proc->data = t; #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &tables); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2fd8afac5f7..480184a857d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -435,7 +435,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol) return -EPROTONOSUPPORT; netlink_lock_table(); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (!nl_table[protocol].registered) { netlink_unlock_table(); request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 9e9c6fce11a..b9d97effebe 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -67,11 +67,10 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) } pnp = phonet_proto_get(protocol); -#ifdef CONFIG_KMOD if (pnp == NULL && request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0) pnp = phonet_proto_get(protocol); -#endif + if (pnp == NULL) return -EPROTONOSUPPORT; if (sock->type != pnp->sock_type) { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9974b3f04f0..8f457f1e0ac 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -494,7 +494,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, a_o = tc_lookup_action_n(act_name); if (a_o == NULL) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES rtnl_unlock(); request_module("act_%s", act_name); rtnl_lock(); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8eb79e92e94..16e7ac9774e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -227,7 +227,7 @@ replay: err = -ENOENT; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); if (tp_ops == NULL) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES struct nlattr *kind = tca[TCA_KIND]; char name[IFNAMSIZ]; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5e6f82e0e6f..e82519e548d 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -224,7 +224,7 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops == NULL) { err = -ENOENT; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES __rtnl_unlock(); request_module("ematch-kind-%u", em_hdr->kind); rtnl_lock(); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1122c952aa9..b16ad2972c6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -764,7 +764,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (ops == NULL && kind != NULL) { char name[IFNAMSIZ]; if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7b5572d6beb..93cd30ce650 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -326,6 +326,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { static struct netdev_queue noop_netdev_queue = { .qdisc = &noop_qdisc, + .qdisc_sleeping = &noop_qdisc, }; struct Qdisc noop_qdisc = { @@ -352,6 +353,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { static struct Qdisc noqueue_qdisc; static struct netdev_queue noqueue_netdev_queue = { .qdisc = &noqueue_qdisc, + .qdisc_sleeping = &noqueue_qdisc, }; static struct Qdisc noqueue_qdisc = { diff --git a/net/socket.c b/net/socket.c index 3e8d4e35c08..2b7a4b5c9b7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1142,7 +1142,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, sock->type = type; -#if defined(CONFIG_KMOD) +#ifdef CONFIG_MODULES /* Attempt to load a protocol module if the find failed. * * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 6bfea9ed686..436bf1b4b76 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -83,10 +83,8 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) if (flavor >= RPC_AUTH_MAXFLAVOR) goto out; -#ifdef CONFIG_KMOD if ((ops = auth_flavors[flavor]) == NULL) request_module("rpc-auth-%u", flavor); -#endif spin_lock(&rpc_authflavor_lock); ops = auth_flavors[flavor]; if (ops == NULL || !try_module_get(ops->owner)) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index da0789fa1b8..4895c341e46 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru } /* save the nodename */ - clnt->cl_nodelen = strlen(utsname()->nodename); + clnt->cl_nodelen = strlen(init_utsname()->nodename); if (clnt->cl_nodelen > UNX_MAXNODENAME) clnt->cl_nodelen = UNX_MAXNODENAME; - memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); + memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen); rpc_register_client(clnt); return clnt; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 34abc91058d..41013dd66ac 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -460,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi return rpc_run_task(&task_setup_data); } +/* + * In the case where rpc clients have been cloned, we want to make + * sure that we use the program number/version etc of the actual + * owner of the xprt. To do so, we walk back up the tree of parents + * to find whoever created the transport and/or whoever has the + * autobind flag set. + */ +static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) +{ + struct rpc_clnt *parent = clnt->cl_parent; + + while (parent != clnt) { + if (parent->cl_xprt != clnt->cl_xprt) + break; + if (clnt->cl_autobind) + break; + clnt = parent; + parent = parent->cl_parent; + } + return clnt; +} + /** * rpcb_getport_async - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request @@ -469,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi */ void rpcb_getport_async(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; + struct rpc_clnt *clnt; struct rpc_procinfo *proc; u32 bind_version; - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt; struct rpc_clnt *rpcb_clnt; static struct rpcbind_args *map; struct rpc_task *child; @@ -481,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task) size_t salen; int status; + clnt = rpcb_find_transport_owner(task->tk_client); + xprt = clnt->cl_xprt; + dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); - /* Autobind on cloned rpc clients is discouraged */ - BUG_ON(clnt->cl_parent != clnt); - /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ rpc_sleep_on(&xprt->binding, task, NULL); @@ -549,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task) status = -ENOMEM; dprintk("RPC: %5u %s: no memory available\n", task->tk_pid, __func__); - goto bailout_nofree; + goto bailout_release_client; } map->r_prog = clnt->cl_prog; map->r_vers = clnt->cl_vers; @@ -569,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__); return; } - rpc_put_task(child); - task->tk_xprt->stat.bind_count++; + xprt->stat.bind_count++; + rpc_put_task(child); return; +bailout_release_client: + rpc_release_client(rpcb_clnt); bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); task->tk_status = status; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 99a52aabe33..29e401bb612 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport) goto out; } - result = -EINVAL; - if (try_module_get(THIS_MODULE)) { - list_add_tail(&transport->list, &xprt_list); - printk(KERN_INFO "RPC: Registered %s transport module.\n", - transport->name); - result = 0; - } + list_add_tail(&transport->list, &xprt_list); + printk(KERN_INFO "RPC: Registered %s transport module.\n", + transport->name); + result = 0; out: spin_unlock(&xprt_list_lock); @@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport) "RPC: Unregistered %s transport module.\n", transport->name); list_del_init(&transport->list); - module_put(THIS_MODULE); goto out; } } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5c1954d28d0..14106d26bb9 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, } if (xdrbuf->tail[0].iov_len) { + /* the rpcrdma protocol allows us to omit any trailing + * xdr pad bytes, saving the server an RDMA operation. */ + if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) + return n; if (n == nsegs) return 0; seg[n].mr_page = NULL; @@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) if (hdrlen == 0) return -1; - dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" - " headerp 0x%p base 0x%p lkey 0x%x\n", + dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" + " headerp 0x%p base 0x%p lkey 0x%x\n", __func__, transfertypes[wtype], hdrlen, rpclen, padlen, headerp, base, req->rl_iov.lkey); @@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b * Scatter inline received data back into provided iov's. */ static void -rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) +rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) { int i, npages, curlen, olen; char *destp; @@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) } else rqst->rq_rcv_buf.tail[0].iov_len = 0; + if (pad) { + /* implicit padding on terminal chunk */ + unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base; + while (pad--) + p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0; + } + if (copy_len) dprintk("RPC: %s: %d bytes in" " %d extra segments (%d lost)\n", @@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) struct rpc_xprt *xprt = ep->rep_xprt; spin_lock_bh(&xprt->transport_lock); + if (++xprt->connect_cookie == 0) /* maintain a reserved value */ + ++xprt->connect_cookie; if (ep->rep_connected > 0) { if (!xprt_test_and_set_connected(xprt)) xprt_wake_pending_tasks(xprt, 0); } else { if (xprt_test_and_clear_connected(xprt)) - xprt_wake_pending_tasks(xprt, ep->rep_connected); + xprt_wake_pending_tasks(xprt, -ENOTCONN); } spin_unlock_bh(&xprt->transport_lock); } @@ -792,14 +805,20 @@ repost: ((unsigned char *)iptr - (unsigned char *)headerp); status = rep->rr_len + rdmalen; r_xprt->rx_stats.total_rdma_reply += rdmalen; + /* special case - last chunk may omit padding */ + if (rdmalen &= 3) { + rdmalen = 4 - rdmalen; + status += rdmalen; + } } else { /* else ordinary inline */ + rdmalen = 0; iptr = (__be32 *)((unsigned char *)headerp + 28); rep->rr_len -= 28; /*sizeof *headerp;*/ status = rep->rr_len; } /* Fix up the rpc results for upper layer */ - rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); + rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); break; case htonl(RDMA_NOMSG): diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec..9839c3d9414 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; -#if !RPCRDMA_PERSISTENT_REGISTRATION -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ -#else -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; -#endif +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; + int xprt_rdma_pad_optimize = 0; #ifdef RPC_DEBUG @@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = { .extra2 = &max_memreg, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "rdma_pad_optimize", + .data = &xprt_rdma_pad_optimize, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0, }, }; @@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); dprintk("RPC: %s: closing\n", __func__); + if (r_xprt->rx_ep.rep_connected > 0) + xprt->reestablish_timeout = 0; xprt_disconnect_done(xprt); (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); } @@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) /* Reconnect */ schedule_delayed_work(&r_xprt->rdma_connect, xprt->reestablish_timeout); + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > (30 * HZ)) + xprt->reestablish_timeout = (30 * HZ); + else if (xprt->reestablish_timeout < (5 * HZ)) + xprt->reestablish_timeout = (5 * HZ); } else { schedule_delayed_work(&r_xprt->rdma_connect, 0); if (!RPC_IS_ASYNC(task)) @@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) } dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); out: + req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_xdr_buf; outfail: @@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task) req->rl_reply->rr_xprt = xprt; } - if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { - xprt_disconnect_done(xprt); - return -ENOTCONN; /* implies disconnect */ - } + /* Must suppress retransmit to maintain credits */ + if (req->rl_connect_cookie == xprt->connect_cookie) + goto drop_connection; + req->rl_connect_cookie = xprt->connect_cookie; + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) + goto drop_connection; + task->tk_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; return 0; + +drop_connection: + xprt_disconnect_done(xprt); + return -ENOTCONN; /* implies disconnect */ } static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) @@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void) { int rc; - dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); + dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); #ifdef RPC_DEBUG if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8ea283ecc52..a5fef5e6c32 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: + ia->ri_async_rc = 0; complete(&ia->ri_done); break; case RDMA_CM_EVENT_ADDR_ERROR: @@ -338,13 +339,32 @@ connected: wake_up_all(&ep->rep_connect_wait); break; default: - ia->ri_async_rc = -EINVAL; - dprintk("RPC: %s: unexpected CM event %X\n", + dprintk("RPC: %s: unexpected CM event %d\n", __func__, event->event); - complete(&ia->ri_done); break; } +#ifdef RPC_DEBUG + if (connstate == 1) { + int ird = attr.max_dest_rd_atomic; + int tird = ep->rep_remote_cma.responder_resources; + printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " + "on %s, memreg %d slots %d ird %d%s\n", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + ia->ri_id->device->name, + ia->ri_memreg_strategy, + xprt->rx_buf.rb_max_requests, + ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); + } else if (connstate < 0) { + printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " + "closed (%d)\n", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + connstate); + } +#endif + return 0; } @@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rdma_cm_id *id; int rc; + init_completion(&ia->ri_done); + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); if (IS_ERR(id)) { rc = PTR_ERR(id); @@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, return id; } - ia->ri_async_rc = 0; + ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); if (rc) { dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", __func__, rc); goto out; } - wait_for_completion(&ia->ri_done); + wait_for_completion_interruptible_timeout(&ia->ri_done, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); rc = ia->ri_async_rc; if (rc) goto out; - ia->ri_async_rc = 0; + ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); if (rc) { dprintk("RPC: %s: rdma_resolve_route() failed %i\n", __func__, rc); goto out; } - wait_for_completion(&ia->ri_done); + wait_for_completion_interruptible_timeout(&ia->ri_done, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); rc = ia->ri_async_rc; if (rc) goto out; @@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { - int rc; + int rc, mem_priv; + struct ib_device_attr devattr; struct rpcrdma_ia *ia = &xprt->rx_ia; - init_completion(&ia->ri_done); - ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); @@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } /* + * Query the device to determine if the requested memory + * registration strategy is supported. If it isn't, set the + * strategy to a globally supported model. + */ + rc = ib_query_device(ia->ri_id->device, &devattr); + if (rc) { + dprintk("RPC: %s: ib_query_device failed %d\n", + __func__, rc); + goto out2; + } + + if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { + ia->ri_have_dma_lkey = 1; + ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; + } + + switch (memreg) { + case RPCRDMA_MEMWINDOWS: + case RPCRDMA_MEMWINDOWS_ASYNC: + if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { + dprintk("RPC: %s: MEMWINDOWS registration " + "specified but not supported by adapter, " + "using slower RPCRDMA_REGISTER\n", + __func__); + memreg = RPCRDMA_REGISTER; + } + break; + case RPCRDMA_MTHCAFMR: + if (!ia->ri_id->device->alloc_fmr) { +#if RPCRDMA_PERSISTENT_REGISTRATION + dprintk("RPC: %s: MTHCAFMR registration " + "specified but not supported by adapter, " + "using riskier RPCRDMA_ALLPHYSICAL\n", + __func__); + memreg = RPCRDMA_ALLPHYSICAL; +#else + dprintk("RPC: %s: MTHCAFMR registration " + "specified but not supported by adapter, " + "using slower RPCRDMA_REGISTER\n", + __func__); + memreg = RPCRDMA_REGISTER; +#endif + } + break; + case RPCRDMA_FRMR: + /* Requires both frmr reg and local dma lkey */ + if ((devattr.device_cap_flags & + (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != + (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { +#if RPCRDMA_PERSISTENT_REGISTRATION + dprintk("RPC: %s: FRMR registration " + "specified but not supported by adapter, " + "using riskier RPCRDMA_ALLPHYSICAL\n", + __func__); + memreg = RPCRDMA_ALLPHYSICAL; +#else + dprintk("RPC: %s: FRMR registration " + "specified but not supported by adapter, " + "using slower RPCRDMA_REGISTER\n", + __func__); + memreg = RPCRDMA_REGISTER; +#endif + } + break; + } + + /* * Optionally obtain an underlying physical identity mapping in * order to do a memory window-based bind. This base registration * is protected from remote access - that is enabled only by binding @@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) * revoked after the corresponding completion similar to a storage * adapter. */ - if (memreg > RPCRDMA_REGISTER) { - int mem_priv = IB_ACCESS_LOCAL_WRITE; - switch (memreg) { + switch (memreg) { + case RPCRDMA_BOUNCEBUFFERS: + case RPCRDMA_REGISTER: + case RPCRDMA_FRMR: + break; #if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: - mem_priv |= IB_ACCESS_REMOTE_WRITE; - mem_priv |= IB_ACCESS_REMOTE_READ; - break; + case RPCRDMA_ALLPHYSICAL: + mem_priv = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + goto register_setup; #endif - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - mem_priv |= IB_ACCESS_MW_BIND; - break; - default: + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + mem_priv = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_MW_BIND; + goto register_setup; + case RPCRDMA_MTHCAFMR: + if (ia->ri_have_dma_lkey) break; - } + mem_priv = IB_ACCESS_LOCAL_WRITE; + register_setup: ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " @@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) memreg = RPCRDMA_REGISTER; ia->ri_bind_mem = NULL; } + break; + default: + printk(KERN_ERR "%s: invalid memory registration mode %d\n", + __func__, memreg); + rc = -EINVAL; + goto out2; } + dprintk("RPC: %s: memory registration strategy is %d\n", + __func__, memreg); /* Else will do memory reg/dereg for each chunk */ ia->ri_memreg_strategy = memreg; @@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) return 0; out2: rdma_destroy_id(ia->ri_id); + ia->ri_id = NULL; out1: return rc; } @@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) dprintk("RPC: %s: ib_dereg_mr returned %i\n", __func__, rc); } - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) - rdma_destroy_qp(ia->ri_id); + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { + if (ia->ri_id->qp) + rdma_destroy_qp(ia->ri_id); + rdma_destroy_id(ia->ri_id); + ia->ri_id = NULL; + } if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { rc = ib_dealloc_pd(ia->ri_pd); dprintk("RPC: %s: ib_dealloc_pd returned %i\n", __func__, rc); } - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) - rdma_destroy_id(ia->ri_id); } /* @@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + /* Add room for frmr register and invalidate WRs */ + ep->rep_attr.cap.max_send_wr *= 3; + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) + return -EINVAL; + break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: /* Add room for mw_binds+unbinds - overkill! */ @@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ - switch (ia->ri_memreg_strategy) { - case RPCRDMA_BOUNCEBUFFERS: + ep->rep_remote_cma.initiator_depth = 0; + if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) ep->rep_remote_cma.responder_resources = 0; - break; - case RPCRDMA_MTHCAFMR: - case RPCRDMA_REGISTER: - ep->rep_remote_cma.responder_resources = cdata->max_requests * - (RPCRDMA_MAX_DATA_SEGS / 8); - break; - case RPCRDMA_MEMWINDOWS: - case RPCRDMA_MEMWINDOWS_ASYNC: -#if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: -#endif - ep->rep_remote_cma.responder_resources = cdata->max_requests * - (RPCRDMA_MAX_DATA_SEGS / 2); - break; - default: - break; - } - if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) + else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ + ep->rep_remote_cma.responder_resources = 32; + else ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; - ep->rep_remote_cma.initiator_depth = 0; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; @@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) if (rc) dprintk("RPC: %s: rpcrdma_ep_disconnect" " returned %i\n", __func__, rc); + rdma_destroy_qp(ia->ri_id); + ia->ri_id->qp = NULL; } - ep->rep_func = NULL; - /* padding - could be done in rpcrdma_buffer_destroy... */ if (ep->rep_pad_mr) { rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); ep->rep_pad_mr = NULL; } - if (ia->ri_id->qp) { - rdma_destroy_qp(ia->ri_id); - ia->ri_id->qp = NULL; - } - rpcrdma_clean_cq(ep->rep_cq); rc = ib_destroy_cq(ep->rep_cq); if (rc) @@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) struct rdma_cm_id *id; int rc = 0; int retry_count = 0; - int reconnect = (ep->rep_connected != 0); - if (reconnect) { + if (ep->rep_connected != 0) { struct rpcrdma_xprt *xprt; retry: rc = rpcrdma_ep_disconnect(ep, ia); @@ -745,6 +836,7 @@ retry: goto out; } /* END TEMP */ + rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = id; } @@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { } } - /* Theoretically a client initiator_depth > 0 is not needed, - * but many peers fail to complete the connection unless they - * == responder_resources! */ - if (ep->rep_remote_cma.initiator_depth != - ep->rep_remote_cma.responder_resources) - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; - ep->rep_connected = 0; rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); @@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { goto out; } - if (reconnect) - return 0; - wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); /* @@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { if (ep->rep_connected <= 0) { /* Sometimes, the only way to reliably connect to remote * CMs is to use same nonzero values for ORD and IRD. */ - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; - if (ep->rep_remote_cma.initiator_depth == 0) - ++ep->rep_remote_cma.initiator_depth; - if (ep->rep_remote_cma.responder_resources == 0) - ++ep->rep_remote_cma.responder_resources; - if (retry_count++ == 0) + if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && + (ep->rep_remote_cma.responder_resources == 0 || + ep->rep_remote_cma.initiator_depth != + ep->rep_remote_cma.responder_resources)) { + if (ep->rep_remote_cma.responder_resources == 0) + ep->rep_remote_cma.responder_resources = 1; + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; goto retry; + } rc = ep->rep_connected; } else { dprintk("RPC: %s: connected\n", __func__); @@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, char *p; size_t len; int i, rc; + struct rpcrdma_mw *r; buf->rb_max_requests = cdata->max_requests; spin_lock_init(&buf->rb_lock); @@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * 2. arrays of struct rpcrdma_req to fill in pointers * 3. array of struct rpcrdma_rep for replies * 4. padding, if any - * 5. mw's, if any + * 5. mw's, fmr's or frmr's, if any * Send/recv buffers in req/rep need to be registered */ @@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); len += cdata->padding; switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; case RPCRDMA_MTHCAFMR: /* TBD we are perhaps overallocating here */ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * @@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * and also reduce unbind-to-bind collision. */ INIT_LIST_HEAD(&buf->rb_mws); + r = (struct rpcrdma_mw *)p; switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { + r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, + RPCRDMA_MAX_SEGS); + if (IS_ERR(r->r.frmr.fr_mr)) { + rc = PTR_ERR(r->r.frmr.fr_mr); + dprintk("RPC: %s: ib_alloc_fast_reg_mr" + " failed %i\n", __func__, rc); + goto out; + } + r->r.frmr.fr_pgl = + ib_alloc_fast_reg_page_list(ia->ri_id->device, + RPCRDMA_MAX_SEGS); + if (IS_ERR(r->r.frmr.fr_pgl)) { + rc = PTR_ERR(r->r.frmr.fr_pgl); + dprintk("RPC: %s: " + "ib_alloc_fast_reg_page_list " + "failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + break; case RPCRDMA_MTHCAFMR: - { - struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; - struct ib_fmr_attr fa = { - RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT - }; /* TBD we are perhaps overallocating here */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + static struct ib_fmr_attr fa = + { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; r->r.fmr = ib_alloc_fmr(ia->ri_pd, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, &fa); @@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, list_add(&r->mw_list, &buf->rb_mws); ++r; } - } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: - { - struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; /* Allocate one extra request's worth, for full cycling */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { r->r.mw = ib_alloc_mw(ia->ri_pd); @@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, list_add(&r->mw_list, &buf->rb_mws); ++r; } - } break; default: break; @@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { int rc, i; struct rpcrdma_ia *ia = rdmab_to_ia(buf); + struct rpcrdma_mw *r; /* clean up in reverse order from create * 1. recv mr memory (mr free, then kfree) @@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) } if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { while (!list_empty(&buf->rb_mws)) { - struct rpcrdma_mw *r; r = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + rc = ib_dereg_mr(r->r.frmr.fr_mr); + if (rc) + dprintk("RPC: %s:" + " ib_dereg_mr" + " failed %i\n", + __func__, rc); + ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + break; case RPCRDMA_MTHCAFMR: rc = ib_dealloc_fmr(r->r.fmr); if (rc) @@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { struct rpcrdma_req *req; unsigned long flags; + int i; + struct rpcrdma_mw *r; spin_lock_irqsave(&buffers->rb_lock, flags); if (buffers->rb_send_index == buffers->rb_max_requests) { @@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) } buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; if (!list_empty(&buffers->rb_mws)) { - int i = RPCRDMA_MAX_SEGS - 1; + i = RPCRDMA_MAX_SEGS - 1; do { - struct rpcrdma_mw *r; r = list_entry(buffers->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); @@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) req->rl_reply = NULL; } switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: case RPCRDMA_MTHCAFMR: case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: @@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, va, len, DMA_BIDIRECTIONAL); iov->length = len; - if (ia->ri_bind_mem != NULL) { + if (ia->ri_have_dma_lkey) { + *mrp = NULL; + iov->lkey = ia->ri_dma_lkey; + return 0; + } else if (ia->ri_bind_mem != NULL) { *mrp = NULL; iov->lkey = ia->ri_bind_mem->lkey; return 0; @@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) seg->mr_dma, seg->mr_dmalen, seg->mr_dir); } +static int +rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia, + struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_mr_seg *seg1 = seg; + struct ib_send_wr frmr_wr, *bad_wr; + u8 key; + int len, pageoff; + int i, rc; + + pageoff = offset_in_page(seg1->mr_offset); + seg1->mr_offset -= pageoff; /* start of page */ + seg1->mr_len += pageoff; + len = -pageoff; + if (*nsegs > RPCRDMA_MAX_DATA_SEGS) + *nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < *nsegs;) { + rpcrdma_map_one(ia, seg, writing); + seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < *nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) + break; + } + dprintk("RPC: %s: Using frmr %p to map %d segments\n", + __func__, seg1->mr_chunk.rl_mw, i); + + /* Bump the key */ + key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); + + /* Prepare FRMR WR */ + memset(&frmr_wr, 0, sizeof frmr_wr); + frmr_wr.opcode = IB_WR_FAST_REG_MR; + frmr_wr.send_flags = 0; /* unsignaled */ + frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; + frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; + frmr_wr.wr.fast_reg.page_list_len = i; + frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; + frmr_wr.wr.fast_reg.access_flags = (writing ? + IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); + frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + DECR_CQCOUNT(&r_xprt->rx_ep); + + rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); + + if (rc) { + dprintk("RPC: %s: failed ib_post_send for register," + " status %i\n", __func__, rc); + while (i--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + seg1->mr_base = seg1->mr_dma + pageoff; + seg1->mr_nsegs = i; + seg1->mr_len = len; + } + *nsegs = i; + return rc; +} + +static int +rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_mr_seg *seg1 = seg; + struct ib_send_wr invalidate_wr, *bad_wr; + int rc; + + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + + memset(&invalidate_wr, 0, sizeof invalidate_wr); + invalidate_wr.opcode = IB_WR_LOCAL_INV; + invalidate_wr.send_flags = 0; /* unsignaled */ + invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + DECR_CQCOUNT(&r_xprt->rx_ep); + + rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); + if (rc) + dprintk("RPC: %s: failed ib_post_send for invalidate," + " status %i\n", __func__, rc); + return rc; +} + +static int +rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia) +{ + struct rpcrdma_mr_seg *seg1 = seg; + u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; + int len, pageoff, i, rc; + + pageoff = offset_in_page(seg1->mr_offset); + seg1->mr_offset -= pageoff; /* start of page */ + seg1->mr_len += pageoff; + len = -pageoff; + if (*nsegs > RPCRDMA_MAX_DATA_SEGS) + *nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < *nsegs;) { + rpcrdma_map_one(ia, seg, writing); + physaddrs[i] = seg->mr_dma; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < *nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) + break; + } + rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, + physaddrs, i, seg1->mr_dma); + if (rc) { + dprintk("RPC: %s: failed ib_map_phys_fmr " + "%u@0x%llx+%i (%d)... status %i\n", __func__, + len, (unsigned long long)seg1->mr_dma, + pageoff, i, rc); + while (i--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_base = seg1->mr_dma + pageoff; + seg1->mr_nsegs = i; + seg1->mr_len = len; + } + *nsegs = i; + return rc; +} + +static int +rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia) +{ + struct rpcrdma_mr_seg *seg1 = seg; + LIST_HEAD(l); + int rc; + + list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + if (rc) + dprintk("RPC: %s: failed ib_unmap_fmr," + " status %i\n", __func__, rc); + return rc; +} + +static int +rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia, + struct rpcrdma_xprt *r_xprt) +{ + int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : + IB_ACCESS_REMOTE_READ); + struct ib_mw_bind param; + int rc; + + *nsegs = 1; + rpcrdma_map_one(ia, seg, writing); + param.mr = ia->ri_bind_mem; + param.wr_id = 0ULL; /* no send cookie */ + param.addr = seg->mr_dma; + param.length = seg->mr_len; + param.send_flags = 0; + param.mw_access_flags = mem_priv; + + DECR_CQCOUNT(&r_xprt->rx_ep); + rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); + if (rc) { + dprintk("RPC: %s: failed ib_bind_mw " + "%u@0x%llx status %i\n", + __func__, seg->mr_len, + (unsigned long long)seg->mr_dma, rc); + rpcrdma_unmap_one(ia, seg); + } else { + seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; + seg->mr_base = param.addr; + seg->mr_nsegs = 1; + } + return rc; +} + +static int +rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia, + struct rpcrdma_xprt *r_xprt, void **r) +{ + struct ib_mw_bind param; + LIST_HEAD(l); + int rc; + + BUG_ON(seg->mr_nsegs != 1); + param.mr = ia->ri_bind_mem; + param.addr = 0ULL; /* unbind */ + param.length = 0; + param.mw_access_flags = 0; + if (*r) { + param.wr_id = (u64) (unsigned long) *r; + param.send_flags = IB_SEND_SIGNALED; + INIT_CQCOUNT(&r_xprt->rx_ep); + } else { + param.wr_id = 0ULL; + param.send_flags = 0; + DECR_CQCOUNT(&r_xprt->rx_ep); + } + rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); + rpcrdma_unmap_one(ia, seg); + if (rc) + dprintk("RPC: %s: failed ib_(un)bind_mw," + " status %i\n", __func__, rc); + else + *r = NULL; /* will upcall on completion */ + return rc; +} + +static int +rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia) +{ + int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : + IB_ACCESS_REMOTE_READ); + struct rpcrdma_mr_seg *seg1 = seg; + struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; + int len, i, rc = 0; + + if (*nsegs > RPCRDMA_MAX_DATA_SEGS) + *nsegs = RPCRDMA_MAX_DATA_SEGS; + for (len = 0, i = 0; i < *nsegs;) { + rpcrdma_map_one(ia, seg, writing); + ipb[i].addr = seg->mr_dma; + ipb[i].size = seg->mr_len; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < *nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + seg1->mr_base = seg1->mr_dma; + seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, + ipb, i, mem_priv, &seg1->mr_base); + if (IS_ERR(seg1->mr_chunk.rl_mr)) { + rc = PTR_ERR(seg1->mr_chunk.rl_mr); + dprintk("RPC: %s: failed ib_reg_phys_mr " + "%u@0x%llx (%d)... status %i\n", + __func__, len, + (unsigned long long)seg1->mr_dma, i, rc); + while (i--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; + seg1->mr_nsegs = i; + seg1->mr_len = len; + } + *nsegs = i; + return rc; +} + +static int +rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia) +{ + struct rpcrdma_mr_seg *seg1 = seg; + int rc; + + rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); + seg1->mr_chunk.rl_mr = NULL; + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + if (rc) + dprintk("RPC: %s: failed ib_dereg_mr," + " status %i\n", __func__, rc); + return rc; +} + int rpcrdma_register_external(struct rpcrdma_mr_seg *seg, int nsegs, int writing, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : - IB_ACCESS_REMOTE_READ); - struct rpcrdma_mr_seg *seg1 = seg; - int i; int rc = 0; switch (ia->ri_memreg_strategy) { @@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, break; #endif - /* Registration using fast memory registration */ + /* Registration using frmr registration */ + case RPCRDMA_FRMR: + rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); + break; + + /* Registration using fmr memory registration */ case RPCRDMA_MTHCAFMR: - { - u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; - int len, pageoff = offset_in_page(seg->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; - if (nsegs > RPCRDMA_MAX_DATA_SEGS) - nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < nsegs;) { - rpcrdma_map_one(ia, seg, writing); - physaddrs[i] = seg->mr_dma; - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) - break; - } - nsegs = i; - rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, - physaddrs, nsegs, seg1->mr_dma); - if (rc) { - dprintk("RPC: %s: failed ib_map_phys_fmr " - "%u@0x%llx+%i (%d)... status %i\n", __func__, - len, (unsigned long long)seg1->mr_dma, - pageoff, nsegs, rc); - while (nsegs--) - rpcrdma_unmap_one(ia, --seg); - } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; - seg1->mr_base = seg1->mr_dma + pageoff; - seg1->mr_nsegs = nsegs; - seg1->mr_len = len; - } - } + rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); break; /* Registration using memory windows */ case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: - { - struct ib_mw_bind param; - rpcrdma_map_one(ia, seg, writing); - param.mr = ia->ri_bind_mem; - param.wr_id = 0ULL; /* no send cookie */ - param.addr = seg->mr_dma; - param.length = seg->mr_len; - param.send_flags = 0; - param.mw_access_flags = mem_priv; - - DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_bind_mw(ia->ri_id->qp, - seg->mr_chunk.rl_mw->r.mw, ¶m); - if (rc) { - dprintk("RPC: %s: failed ib_bind_mw " - "%u@0x%llx status %i\n", - __func__, seg->mr_len, - (unsigned long long)seg->mr_dma, rc); - rpcrdma_unmap_one(ia, seg); - } else { - seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; - seg->mr_base = param.addr; - seg->mr_nsegs = 1; - nsegs = 1; - } - } + rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); break; /* Default registration each time */ default: - { - struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; - int len = 0; - if (nsegs > RPCRDMA_MAX_DATA_SEGS) - nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < nsegs;) { - rpcrdma_map_one(ia, seg, writing); - ipb[i].addr = seg->mr_dma; - ipb[i].size = seg->mr_len; - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) - break; - } - nsegs = i; - seg1->mr_base = seg1->mr_dma; - seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, - ipb, nsegs, mem_priv, &seg1->mr_base); - if (IS_ERR(seg1->mr_chunk.rl_mr)) { - rc = PTR_ERR(seg1->mr_chunk.rl_mr); - dprintk("RPC: %s: failed ib_reg_phys_mr " - "%u@0x%llx (%d)... status %i\n", - __func__, len, - (unsigned long long)seg1->mr_dma, nsegs, rc); - while (nsegs--) - rpcrdma_unmap_one(ia, --seg); - } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; - seg1->mr_nsegs = nsegs; - seg1->mr_len = len; - } - } + rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); break; } if (rc) @@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt, void *r) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_mr_seg *seg1 = seg; int nsegs = seg->mr_nsegs, rc; switch (ia->ri_memreg_strategy) { @@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, break; #endif + case RPCRDMA_FRMR: + rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); + break; + case RPCRDMA_MTHCAFMR: - { - LIST_HEAD(l); - list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); - rc = ib_unmap_fmr(&l); - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - } - if (rc) - dprintk("RPC: %s: failed ib_unmap_fmr," - " status %i\n", __func__, rc); + rc = rpcrdma_deregister_fmr_external(seg, ia); break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: - { - struct ib_mw_bind param; - BUG_ON(nsegs != 1); - param.mr = ia->ri_bind_mem; - param.addr = 0ULL; /* unbind */ - param.length = 0; - param.mw_access_flags = 0; - if (r) { - param.wr_id = (u64) (unsigned long) r; - param.send_flags = IB_SEND_SIGNALED; - INIT_CQCOUNT(&r_xprt->rx_ep); - } else { - param.wr_id = 0ULL; - param.send_flags = 0; - DECR_CQCOUNT(&r_xprt->rx_ep); - } - rc = ib_bind_mw(ia->ri_id->qp, - seg->mr_chunk.rl_mw->r.mw, ¶m); - rpcrdma_unmap_one(ia, seg); - } - if (rc) - dprintk("RPC: %s: failed ib_(un)bind_mw," - " status %i\n", __func__, rc); - else - r = NULL; /* will upcall on completion */ + rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); break; default: - rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); - seg1->mr_chunk.rl_mr = NULL; - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - if (rc) - dprintk("RPC: %s: failed ib_dereg_mr," - " status %i\n", __func__, rc); + rc = rpcrdma_deregister_default_external(seg, ia); break; } if (r) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2427822f8bd..c7a7eba991b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -51,6 +51,9 @@ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ +#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ +#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ + /* * Interface Adapter -- one per transport instance */ @@ -58,6 +61,8 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_bind_mem; + u32 ri_dma_lkey; + int ri_have_dma_lkey; struct completion ri_done; int ri_async_rc; enum rpcrdma_memreg ri_memreg_strategy; @@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ union { struct ib_mw *mw; struct ib_fmr *fmr; + struct { + struct ib_fast_reg_page_list *fr_pgl; + struct ib_mr *fr_mr; + } frmr; } r; struct list_head mw_list; } *rl_mw; @@ -175,6 +184,7 @@ struct rpcrdma_req { size_t rl_size; /* actual length of buffer */ unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_nchunks; /* non-zero if chunks */ + unsigned int rl_connect_cookie; /* retry detection */ struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ @@ -198,7 +208,7 @@ struct rpcrdma_buffer { atomic_t rb_credits; /* most recent server credits */ unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ int rb_max_requests;/* client max requests */ - struct list_head rb_mws; /* optional memory windows/fmrs */ + struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ int rb_send_index; struct rpcrdma_req **rb_send_bufs; int rb_recv_index; @@ -273,6 +283,11 @@ struct rpcrdma_xprt { #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) +/* Setting this to 0 ensures interoperability with early servers. + * Setting this to 1 enhances certain unaligned read/write performance. + * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ +extern int xprt_rdma_pad_optimize; + /* * Interface Adapter calls - xprtrdma/verbs.c */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 24fdd4cd22c..5031db7b275 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -184,7 +184,8 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, if (result) goto out_unlock; - if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent, + if (rdev->wiphy.debugfsdir && + !debugfs_rename(rdev->wiphy.debugfsdir->d_parent, rdev->wiphy.debugfsdir, rdev->wiphy.debugfsdir->d_parent, newname)) @@ -317,6 +318,8 @@ int wiphy_register(struct wiphy *wiphy) drv->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&drv->wiphy), ieee80211_debugfs_dir); + if (IS_ERR(drv->wiphy.debugfsdir)) + drv->wiphy.debugfsdir = NULL; res = 0; out_unlock: |