diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 14:20:53 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 14:20:53 +0200 |
commit | e33bae14fd8da449d735552d78e6dd33ece0458c (patch) | |
tree | 048c062c47f7bfdb0ea8ad19c5b5849a58ff3022 /net | |
parent | 7670c7010c7b367ca40c3aba02afb36149764a6e (diff) | |
parent | 14211d026dad4641d4dffd7a4c520bcaa8fd4a65 (diff) |
Merge branch 'for-linus' of git://github.com/ericvh/linux
* 'for-linus' of git://github.com/ericvh/linux:
9p: fix 9p.txt to advertise msize instead of maxdata
net/9p: Convert net/9p protocol dumps to tracepoints
fs/9p: change an int to unsigned int
fs/9p: Cleanup option parsing in 9p
9p: move dereference after NULL check
fs/9p: inode file operation is properly initialized init_special_inode
fs/9p: Update zero-copy implementation in 9p
Diffstat (limited to 'net')
-rw-r--r-- | net/9p/client.c | 469 | ||||
-rw-r--r-- | net/9p/protocol.c | 99 | ||||
-rw-r--r-- | net/9p/protocol.h | 4 | ||||
-rw-r--r-- | net/9p/trans_common.c | 53 | ||||
-rw-r--r-- | net/9p/trans_common.h | 21 | ||||
-rw-r--r-- | net/9p/trans_virtio.c | 319 |
6 files changed, 554 insertions, 411 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 0505a03c374..854ca7a911c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -38,6 +38,9 @@ #include <net/9p/transport.h> #include "protocol.h" +#define CREATE_TRACE_POINTS +#include <trace/events/9p.h> + /* * Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options @@ -123,21 +126,19 @@ static int parse_opts(char *opts, struct p9_client *clnt) options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { - int token; + int token, r; if (!*p) continue; token = match_token(p, tokens, args); - if (token < Opt_trans) { - int r = match_int(&args[0], &option); + switch (token) { + case Opt_msize: + r = match_int(&args[0], &option); if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + "integer field, but no integer?\n"); ret = r; continue; } - } - switch (token) { - case Opt_msize: clnt->msize = option; break; case Opt_trans: @@ -203,11 +204,13 @@ free_and_return: * */ -static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +static struct p9_req_t * +p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) { unsigned long flags; int row, col; struct p9_req_t *req; + int alloc_msize = min(c->msize, max_size); /* This looks up the original request by tag so we know which * buffer to read the data into */ @@ -245,23 +248,10 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - int alloc_msize = min(c->msize, 4096); - req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->tc->capacity = alloc_msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->rc->capacity = alloc_msize; - } else { - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->tc->capacity = c->msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->rc->capacity = c->msize; - } + req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); + req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -271,6 +261,8 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) req->wq = NULL; return ERR_PTR(-ENOMEM); } + req->tc->capacity = alloc_msize; + req->rc->capacity = alloc_msize; req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } @@ -475,37 +467,22 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after check errors which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); if (err) { P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } - if (type != P9_RERROR && type != P9_RLERROR) return 0; if (!p9_is_proto_dotl(c)) { char *ename; - - if (req->tc->pbuf_size) { - /* Handle user buffers */ - size_t len = req->rc->size - req->rc->offset; - if (req->tc->pubuf) { - /* User Buffer */ - err = copy_from_user( - &req->rc->sdata[req->rc->offset], - req->tc->pubuf, len); - if (err) { - err = -EFAULT; - goto out_err; - } - } else { - /* Kernel Buffer */ - memmove(&req->rc->sdata[req->rc->offset], - req->tc->pkbuf, len); - } - } err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); + &ename, &ecode); if (err) goto out_err; @@ -515,11 +492,10 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, - ename); - - kfree(ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); } + kfree(ename); } else { err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); err = -ecode; @@ -527,7 +503,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } - return err; out_err: @@ -536,6 +511,115 @@ out_err: return err; } +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, + char *uidata, int in_hdrlen, int kern_buf) +{ + int err; + int ecode; + int8_t type; + char *ename = NULL; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after parse_header which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + /* Error is reported in string format */ + uint16_t len; + /* 7 = header size for RERROR, 2 is the size of string len; */ + int inline_len = in_hdrlen - (7 + 2); + + /* Read the size of error string */ + err = p9pdu_readf(req->rc, c->proto_version, "w", &len); + if (err) + goto out_err; + + ename = kmalloc(len + 1, GFP_NOFS); + if (!ename) { + err = -ENOMEM; + goto out_err; + } + if (len <= inline_len) { + /* We have error in protocol buffer itself */ + if (pdu_read(req->rc, ename, len)) { + err = -EFAULT; + goto out_free; + + } + } else { + /* + * Part of the data is in user space buffer. + */ + if (pdu_read(req->rc, ename, inline_len)) { + err = -EFAULT; + goto out_free; + + } + if (kern_buf) { + memcpy(ename + inline_len, uidata, + len - inline_len); + } else { + err = copy_from_user(ename + inline_len, + uidata, len - inline_len); + if (err) { + err = -EFAULT; + goto out_free; + } + } + } + ename[len] = 0; + if (p9_is_proto_dotu(c)) { + /* For dotu we also have error code */ + err = p9pdu_readf(req->rc, + c->proto_version, "d", &ecode); + if (err) + goto out_free; + err = -ecode; + } + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + return err; + +out_free: + kfree(ename); +out_err: + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + return err; +} + static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); @@ -579,23 +663,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return 0; } -/** - * p9_client_rpc - issue a request and wait for a response - * @c: client session - * @type: type of request - * @fmt: protocol format string (see protocol.c) - * - * Returns request structure (which client must free using p9_free_req) - */ - -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, + int8_t type, int req_size, + const char *fmt, va_list ap) { - va_list ap; int tag, err; struct p9_req_t *req; - unsigned long flags; - int sigpending; P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -607,12 +680,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } else - sigpending = 0; - tag = P9_NOTAG; if (type != P9_TVERSION) { tag = p9_idpool_get(c->tagpool); @@ -620,18 +687,51 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) return ERR_PTR(-ENOMEM); } - req = p9_tag_alloc(c, tag); + req = p9_tag_alloc(c, tag, req_size); if (IS_ERR(req)) return req; /* marshall the data */ p9pdu_prepare(req->tc, tag, type); - va_start(ap, fmt); err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); - va_end(ap); if (err) goto reterr; - p9pdu_finalize(req->tc); + p9pdu_finalize(c, req->tc); + trace_9p_client_req(c, type, tag); + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; err = c->trans_mod->request(c, req); if (err < 0) { @@ -639,18 +739,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } - - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + /* Wait for the response */ err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", - req->wq, tag, err); + req->status >= REQ_STATUS_RCVD); if (req->status == REQ_STATUS_ERROR) { P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if ((err == -ERESTARTSYS) && (c->status == Connected)) { P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; @@ -663,25 +759,102 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (req->status == REQ_STATUS_RCVD) err = 0; } - if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (err < 0) goto reterr; err = p9_check_errors(c, req); - if (!err) { - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + char *uidata, char *uodata, + int inlen, int olen, int in_hdrlen, + int kern_buf, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + /* + * We allocate a inline protocol data of only 4k bytes. + * The actual content is passed in zero-copy fashion. + */ + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + va_end(ap); + if (IS_ERR(req)) return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + /* If we are called with KERNEL_DS force kern_buf */ + if (segment_eq(get_fs(), KERNEL_DS)) + kern_buf = 1; + + err = c->trans_mod->zc_request(c, req, uidata, uodata, + inlen, olen, in_hdrlen, kern_buf); + if (err < 0) { + if (err == -EIO) + c->status = Disconnected; + goto reterr; + } + if (req->status == REQ_STATUS_ERROR) { + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; + } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } + if (err < 0) + goto reterr; + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; reterr: - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, - err); p9_free_req(c, req); return ERR_PTR(err); } @@ -769,7 +942,7 @@ static int p9_client_version(struct p9_client *c) err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(c, req->rc); goto error; } @@ -906,15 +1079,14 @@ EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname) { - int err; + int err = 0; struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", - afid ? afid->fid : -1, uname, aname); - err = 0; + P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -931,7 +1103,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -991,7 +1163,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1058,7 +1230,7 @@ int p9_client_open(struct p9_fid *fid, int mode) err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1101,7 +1273,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1146,7 +1318,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1185,7 +1357,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1330,13 +1502,15 @@ int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count) { - int err, rsize; - struct p9_client *clnt; - struct p9_req_t *req; char *dataptr; + int kernel_buf = 0; + struct p9_req_t *req; + struct p9_client *clnt; + int err, rsize, non_zc = 0; + - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1348,13 +1522,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, rsize = count; /* Don't bother zerocopy for small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, - rsize, data, udata); + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *indata; + if (data) { + kernel_buf = 1; + indata = data; + } else + indata = (char *)udata; + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, + 11, kernel_buf, "dqd", fid->fid, + offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, - rsize); + rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1363,14 +1548,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - P9_DUMP_PKT(1, req->rc); - if (!req->tc->pbuf_size) { + if (non_zc) { if (data) { memmove(data, dataptr, count); } else { @@ -1396,6 +1580,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, u64 offset, u32 count) { int err, rsize; + int kernel_buf = 0; struct p9_client *clnt; struct p9_req_t *req; @@ -1411,19 +1596,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - /* Don't bother zerocopy form small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, - rsize, data, udata); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *odata; + if (data) { + kernel_buf = 1; + odata = data; + } else + odata = (char *)udata; + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, + P9_ZC_HDR_SZ, kernel_buf, "dqd", + fid->fid, offset, rsize); } else { - if (data) req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, - offset, rsize, data); + offset, rsize, data); else req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, - offset, rsize, udata); + offset, rsize, udata); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1432,7 +1622,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1472,7 +1662,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1523,7 +1713,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1671,7 +1861,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1778,7 +1968,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1824,7 +2014,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize; + int err, rsize, non_zc = 0; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; @@ -1842,13 +2032,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, - offset, rsize, data); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, + 11, 1, "dqd", fid->fid, offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, - offset, rsize); + offset, rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1857,13 +2052,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (!req->tc->pbuf_size && data) + if (non_zc) memmove(data, dataptr, count); p9_free_req(clnt, req); @@ -1894,7 +2089,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, @@ -1925,7 +2120,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, @@ -1960,7 +2155,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); @@ -1993,7 +2188,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) &glock->start, &glock->length, &glock->proc_id, &glock->client_id); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " @@ -2021,7 +2216,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index df58375ea6b..55e10a96c90 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -37,40 +37,11 @@ #include <net/9p/client.h> #include "protocol.h" +#include <trace/events/9p.h> + static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); -#ifdef CONFIG_NET_9P_DEBUG -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ - int len = pdu->size; - - if ((p9_debug_level & P9_DEBUG_VPKT) != P9_DEBUG_VPKT) { - if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) { - if (len > 32) - len = 32; - } else { - /* shouldn't happen */ - return; - } - } - - if (way) - print_hex_dump_bytes("[9P] ", DUMP_PREFIX_OFFSET, pdu->sdata, - len); - else - print_hex_dump_bytes("]9P[ ", DUMP_PREFIX_OFFSET, pdu->sdata, - len); -} -#else -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ -} -#endif -EXPORT_SYMBOL(p9pdu_dump); - void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); @@ -81,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf) } EXPORT_SYMBOL(p9stat_free); -static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); @@ -108,26 +79,6 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } -static size_t -pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, - size_t size) -{ - BUG_ON(pdu->size > P9_IOHDRSZ); - pdu->pubuf = (char __user *)udata; - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - -static size_t -pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) -{ - BUG_ON(pdu->size > P9_READDIRHDRSZ); - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - /* b - int8_t w - int16_t @@ -459,26 +410,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; - case 'E':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - const char __user *u = va_arg(ap, - const void __user *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_urw(pdu, k, u, cnt)) - errcode = -EFAULT; - } - break; - case 'F':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_readdir(pdu, k, cnt)) - errcode = -EFAULT; - } - break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = @@ -591,7 +522,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) return ret; } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) +int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) { struct p9_fcall fake_pdu; int ret; @@ -601,10 +532,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); - P9_DUMP_PKT(0, &fake_pdu); + trace_9p_protocol_dump(clnt, &fake_pdu); } return ret; @@ -617,7 +548,7 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } -int p9pdu_finalize(struct p9_fcall *pdu) +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) { int size = pdu->size; int err; @@ -626,7 +557,7 @@ int p9pdu_finalize(struct p9_fcall *pdu) err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; - P9_DUMP_PKT(0, pdu); + trace_9p_protocol_dump(clnt, pdu); P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, pdu->id, pdu->tag); @@ -637,14 +568,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; - pdu->private = NULL; - pdu->pubuf = NULL; - pdu->pkbuf = NULL; - pdu->pbuf_size = 0; } -int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, - int proto_version) +int p9dirent_read(struct p9_client *clnt, char *buf, int len, + struct p9_dirent *dirent) { struct p9_fcall fake_pdu; int ret; @@ -655,11 +582,11 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, - &dirent->d_off, &dirent->d_type, &nameptr); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); - P9_DUMP_PKT(1, &fake_pdu); + trace_9p_protocol_dump(clnt, &fake_pdu); goto out; } diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 2431c0f38d5..2cc525fa49f 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap); int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); -int p9pdu_finalize(struct p9_fcall *pdu); -void p9pdu_dump(int, struct p9_fcall *); +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu); void p9pdu_reset(struct p9_fcall *pdu); +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 9a70ebdec56..de8df957867 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -21,30 +21,25 @@ /** * p9_release_req_pages - Release pages after the transaction. - * @*private: PDU's private page of struct trans_rpage_info */ -void -p9_release_req_pages(struct trans_rpage_info *rpinfo) +void p9_release_pages(struct page **pages, int nr_pages) { int i = 0; - - while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { - put_page(rpinfo->rp_data[i]); + while (pages[i] && nr_pages--) { + put_page(pages[i]); i++; } } -EXPORT_SYMBOL(p9_release_req_pages); +EXPORT_SYMBOL(p9_release_pages); /** * p9_nr_pages - Return number of pages needed to accommodate the payload. */ -int -p9_nr_pages(struct p9_req_t *req) +int p9_nr_pages(char *data, int len) { unsigned long start_page, end_page; - start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; - end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + - PAGE_SIZE - 1) >> PAGE_SHIFT; + start_page = (unsigned long)data >> PAGE_SHIFT; + end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; return end_page - start_page; } EXPORT_SYMBOL(p9_nr_pages); @@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages); * @nr_pages: number of pages to accommodate the payload * @rw: Indicates if the pages are for read or write. */ -int -p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, - int nr_pages, u8 rw) -{ - uint32_t first_page_bytes = 0; - int32_t pdata_mapped_pages; - struct trans_rpage_info *rpinfo; - - *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); - if (*pdata_off) - first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), - req->tc->pbuf_size); +int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) +{ + int nr_mapped_pages; - rpinfo = req->tc->private; - pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, - nr_pages, rw, &rpinfo->rp_data[0]); - if (pdata_mapped_pages <= 0) - return pdata_mapped_pages; + nr_mapped_pages = get_user_pages_fast((unsigned long)data, + *nr_pages, write, pages); + if (nr_mapped_pages <= 0) + return nr_mapped_pages; - rpinfo->rp_nr_pages = pdata_mapped_pages; - if (*pdata_off) { - *pdata_len = first_page_bytes; - *pdata_len += min((req->tc->pbuf_size - *pdata_len), - ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); - } else { - *pdata_len = min(req->tc->pbuf_size, - (size_t)pdata_mapped_pages << PAGE_SHIFT); - } + *nr_pages = nr_mapped_pages; return 0; } EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 76309223bb0..173bb550a9e 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h @@ -12,21 +12,6 @@ * */ -/* TRUE if it is user context */ -#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) - -/** - * struct trans_rpage_info - To store mapped page information in PDU. - * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. - * @rp_nr_pages: Number of mapped pages - * @rp_data: Array of page pointers - */ -struct trans_rpage_info { - u8 rp_alloc; - int rp_nr_pages; - struct page *rp_data[0]; -}; - -void p9_release_req_pages(struct trans_rpage_info *); -int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); -int p9_nr_pages(struct p9_req_t *); +void p9_release_pages(struct page **, int); +int p9_payload_gup(char *, int *, struct page **, int); +int p9_nr_pages(char *, int); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e317583fcc7..32aa9834229 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -150,12 +150,10 @@ static void req_done(struct virtqueue *vq) while (1) { spin_lock_irqsave(&chan->lock, flags); rc = virtqueue_get_buf(chan->vq, &len); - if (rc == NULL) { spin_unlock_irqrestore(&chan->lock, flags); break; } - chan->ring_bufs_avail = 1; spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ @@ -163,17 +161,6 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); - if (req->tc->private) { - struct trans_rpage_info *rp = req->tc->private; - int p = rp->rp_nr_pages; - /*Release pages */ - p9_release_req_pages(rp); - atomic_sub(p, &vp_pinned); - wake_up(&vp_wq); - if (rp->rp_alloc) - kfree(rp); - req->tc->private = NULL; - } req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); } @@ -193,9 +180,8 @@ static void req_done(struct virtqueue *vq) * */ -static int -pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, - int count) +static int pack_sg_list(struct scatterlist *sg, int start, + int limit, char *data, int count) { int s; int index = start; @@ -224,31 +210,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) * this takes a list of pages. * @sg: scatter/gather list to pack into * @start: which segment of the sg_list to start at - * @pdata_off: Offset into the first page * @**pdata: a list of pages to add into sg. + * @nr_pages: number of pages to pack into the scatter/gather list + * @data: data to pack into scatter/gather list * @count: amount of data to pack into the scatter/gather list */ static int -pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, - struct page **pdata, int count) +pack_sg_list_p(struct scatterlist *sg, int start, int limit, + struct page **pdata, int nr_pages, char *data, int count) { - int s; - int i = 0; + int i = 0, s; + int data_off; int index = start; - if (pdata_off) { - s = min((int)(PAGE_SIZE - pdata_off), count); - sg_set_page(&sg[index++], pdata[i++], s, pdata_off); - count -= s; - } - - while (count) { - BUG_ON(index > limit); - s = min((int)PAGE_SIZE, count); - sg_set_page(&sg[index++], pdata[i++], s, 0); + BUG_ON(nr_pages > (limit - start)); + /* + * if the first page doesn't start at + * page boundary find the offset + */ + data_off = offset_in_page(data); + while (nr_pages) { + s = rest_of_page(data); + if (s > count) + s = count; + sg_set_page(&sg[index++], pdata[i++], s, data_off); + data_off = 0; + data += s; count -= s; + nr_pages--; } - return index-start; + return index - start; } /** @@ -261,114 +252,166 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out, inp, outp; - struct virtio_chan *chan = client->trans; + int err; + int in, out; unsigned long flags; - size_t pdata_off = 0; - struct trans_rpage_info *rpinfo = NULL; - int err, pdata_len = 0; + struct virtio_chan *chan = client->trans; P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req->status = REQ_STATUS_SENT; +req_retry: + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); - if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { - int nr_pages = p9_nr_pages(req); - int rpinfo_size = sizeof(struct trans_rpage_info) + - sizeof(struct page *) * nr_pages; + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_interruptible(vp_wq, - atomic_read(&vp_pinned) < chan->p9_max_pages); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + if (err < 0) { + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) return err; - P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); - } - if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { - /* We can use sdata */ - req->tc->private = req->tc->sdata + req->tc->size; - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 0; + P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + goto req_retry; } else { - req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); - if (!req->tc->private) { - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " - "private kmalloc returned NULL"); - return -ENOMEM; - } - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 1; + spin_unlock_irqrestore(&chan->lock, flags); + P9_DPRINTK(P9_DEBUG_TRANS, + "9p debug: " + "virtio rpc add_buf returned failure"); + return -EIO; } + } + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); - err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, - req->tc->id == P9_TREAD ? 1 : 0); - if (err < 0) { - if (rpinfo->rp_alloc) - kfree(rpinfo); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + return 0; +} + +static int p9_get_mapped_pages(struct virtio_chan *chan, + struct page **pages, char *data, + int nr_pages, int write, int kern_buf) +{ + int err; + if (!kern_buf) { + /* + * We allow only p9_max_pages pinned. We wait for the + * Other zc request to finish here + */ + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + (atomic_read(&vp_pinned) < chan->p9_max_pages)); + if (err == -ERESTARTSYS) + return err; + } + err = p9_payload_gup(data, &nr_pages, pages, write); + if (err < 0) return err; - } else { - atomic_add(rpinfo->rp_nr_pages, &vp_pinned); + atomic_add(nr_pages, &vp_pinned); + } else { + /* kernel buffer, no need to pin pages */ + int s, index = 0; + int count = nr_pages; + while (nr_pages) { + s = rest_of_page(data); + pages[index++] = virt_to_page(data); + data += s; + nr_pages--; } + nr_pages = count; } + return nr_pages; +} -req_retry_pinned: - spin_lock_irqsave(&chan->lock, flags); +/** + * p9_virtio_zc_request - issue a zero copy request + * @client: client instance issuing the request + * @req: request to be issued + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * + */ +static int +p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + char *uidata, char *uodata, int inlen, + int outlen, int in_hdr_len, int kern_buf) +{ + int in, out, err; + unsigned long flags; + int in_nr_pages = 0, out_nr_pages = 0; + struct page **in_pages = NULL, **out_pages = NULL; + struct virtio_chan *chan = client->trans; - /* Handle out VirtIO ring buffers */ - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - - if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { - /* We have additional write payload buffer to take care */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, - req->tc->pbuf_size); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + + if (uodata) { + out_nr_pages = p9_nr_pages(uodata, outlen); + out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, + GFP_NOFS); + if (!out_pages) { + err = -ENOMEM; + goto err_out; + } + out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, + out_nr_pages, 0, kern_buf); + if (out_nr_pages < 0) { + err = out_nr_pages; + kfree(out_pages); + out_pages = NULL; + goto err_out; } - out += outp; } - - /* Handle in VirtIO ring buffers */ - if (req->tc->pbuf_size && - ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { - /* - * Take care of additional Read payload. - * 11 is the read/write header = PDU Header(7) + IO Size (4). - * Arrange in such a way that server places header in the - * alloced memory and payload onto the user buffer. - */ - inp = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc->sdata, 11); - /* - * Running executables in the filesystem may result in - * a read request with kernel buffer as opposed to user buffer. - */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - - in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, - pbuf, req->tc->pbuf_size); + if (uidata) { + in_nr_pages = p9_nr_pages(uidata, inlen); + in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, + GFP_NOFS); + if (!in_pages) { + err = -ENOMEM; + goto err_out; + } + in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, + in_nr_pages, 1, kern_buf); + if (in_nr_pages < 0) { + err = in_nr_pages; + kfree(in_pages); + in_pages = NULL; + goto err_out; } - in += inp; - } else { - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, - req->rc->sdata, req->rc->capacity); } + req->status = REQ_STATUS_SENT; +req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); + /* out data */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + + if (out_pages) + out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + out_pages, out_nr_pages, uodata, outlen); + /* + * Take care of in data + * For example TREAD have 11. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); + if (in_pages) + in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, + in_pages, in_nr_pages, uidata, inlen); err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); if (err < 0) { @@ -376,28 +419,45 @@ req_retry_pinned: chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + chan->ring_bufs_avail); if (err == -ERESTARTSYS) - return err; + goto err_out; P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); goto req_retry_pinned; } else { spin_unlock_irqrestore(&chan->lock, flags); P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: " - "virtio rpc add_buf returned failure"); - if (rpinfo && rpinfo->rp_alloc) - kfree(rpinfo); - return -EIO; + "9p debug: " + "virtio rpc add_buf returned failure"); + err = -EIO; + goto err_out; } } - virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); - return 0; + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + /* + * Non kernel buffers are pinned, unpin them + */ +err_out: + if (!kern_buf) { + if (in_pages) { + p9_release_pages(in_pages, in_nr_pages); + atomic_sub(in_nr_pages, &vp_pinned); + } + if (out_pages) { + p9_release_pages(out_pages, out_nr_pages); + atomic_sub(out_nr_pages, &vp_pinned); + } + /* wakeup anybody waiting for slots to pin pages */ + wake_up(&vp_wq); + } + kfree(in_pages); + kfree(out_pages); + return err; } static ssize_t p9_mount_tag_show(struct device *dev, @@ -591,8 +651,8 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, + .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, - /* * We leave one entry for input and one entry for response * headers. We also skip one more entry to accomodate, address @@ -600,7 +660,6 @@ static struct p9_trans_module p9_virtio_trans = { * page in zero copy. */ .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), - .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; |