diff options
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r-- | fs/nfs/direct.c | 751 |
1 files changed, 355 insertions, 396 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 481be7f7bdd..ad2775d3e21 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@ #include "internal.h" #include "iostat.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -81,16 +82,19 @@ struct nfs_direct_req { struct completion completion; /* wait for i/o completion */ /* commit state */ - struct list_head rewrite_list; /* saved nfs_write_data structs */ - struct nfs_write_data * commit_data; /* special write_data for commits */ + struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ + struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ + struct work_struct work; int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ struct nfs_writeverf verf; /* unstable write verifier */ }; +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); -static const struct rpc_call_ops nfs_write_direct_ops; +static void nfs_direct_write_schedule_work(struct work_struct *work); static inline void get_dreq(struct nfs_direct_req *dreq) { @@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count) -{ - unsigned int npages; - unsigned int i; - - if (count == 0) - return; - pages += (pgbase >> PAGE_SHIFT); - npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - for (i = 0; i < npages; i++) { - struct page *page = pages[i]; - if (!PageCompound(page)) - set_page_dirty(page); - } -} - static void nfs_direct_release_pages(struct page **pages, unsigned int npages) { unsigned int i; @@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages) page_cache_release(pages[i]); } +void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, + struct nfs_direct_req *dreq) +{ + cinfo->lock = &dreq->lock; + cinfo->mds = &dreq->mds_cinfo; + cinfo->ds = &dreq->ds_cinfo; + cinfo->dreq = dreq; + cinfo->completion_ops = &nfs_direct_commit_completion_ops; +} + static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; - dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); + dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL); if (!dreq) return NULL; kref_init(&dreq->kref); kref_get(&dreq->kref); init_completion(&dreq->completion); - INIT_LIST_HEAD(&dreq->rewrite_list); - dreq->iocb = NULL; - dreq->ctx = NULL; - dreq->l_ctx = NULL; + INIT_LIST_HEAD(&dreq->mds_cinfo.list); + INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); spin_lock_init(&dreq->lock); - atomic_set(&dreq->io_count, 0); - dreq->count = 0; - dreq->error = 0; - dreq->flags = 0; return dreq; } @@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) nfs_direct_req_release(dreq); } -/* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - */ -static void nfs_direct_read_result(struct rpc_task *task, void *calldata) +static void nfs_direct_readpage_release(struct nfs_page *req) { - struct nfs_read_data *data = calldata; - - nfs_readpage_result(task, data); + dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + nfs_release_request(req); } -static void nfs_direct_read_release(void *calldata) +static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) { + unsigned long bytes = 0; + struct nfs_direct_req *dreq = hdr->dreq; - struct nfs_read_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - int status = data->task.tk_status; + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out_put; spin_lock(&dreq->lock); - if (unlikely(status < 0)) { - dreq->error = status; - spin_unlock(&dreq->lock); - } else { - dreq->count += data->res.count; - spin_unlock(&dreq->lock); - nfs_direct_dirty_pages(data->pagevec, - data->args.pgbase, - data->res.count); - } - nfs_direct_release_pages(data->pagevec, data->npages); + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) + dreq->error = hdr->error; + else + dreq->count += hdr->good_bytes; + spin_unlock(&dreq->lock); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); + } + if (!PageCompound(page)) { + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + if (bytes < hdr->good_bytes) + set_page_dirty(page); + } else + set_page_dirty(page); + } + bytes += req->wb_bytes; + nfs_list_remove_request(req); + nfs_direct_readpage_release(req); + } +out_put: if (put_dreq(dreq)) nfs_direct_complete(dreq); - nfs_readdata_free(data); + hdr->release(hdr); +} + +static void nfs_read_sync_pgio_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_release_request(req); + } } -static const struct rpc_call_ops nfs_read_direct_ops = { - .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_direct_read_result, - .rpc_release = nfs_direct_read_release, +static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) +{ + get_dreq(hdr->dreq); +} + +static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { + .error_cleanup = nfs_read_sync_pgio_error, + .init_hdr = nfs_direct_pgio_init, + .completion = nfs_direct_read_completion, }; /* @@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = { * handled automatically by nfs_direct_read_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, loff_t pos) { + struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; size_t rsize = NFS_SERVER(inode)->rsize; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_read_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; unsigned int pgbase; int result; ssize_t started = 0; + struct page **pagevec = NULL; + unsigned int npages; do { - struct nfs_read_data *data; size_t bytes; + int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(rsize,count); + bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); result = -ENOMEM; - data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); - if (unlikely(!data)) + npages = nfs_page_array_len(pgbase, bytes); + if (!pagevec) + pagevec = kmalloc(npages * sizeof(struct page *), + GFP_KERNEL); + if (!pagevec) break; - down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 1, 0, data->pagevec, NULL); + npages, 1, 0, pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) { - nfs_readdata_free(data); + if (result < 0) break; - } - if ((unsigned)result < data->npages) { + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_free(data); + nfs_direct_release_pages(pagevec, result); break; } bytes -= pgbase; - data->npages = result; + npages = result; } - get_dreq(dreq); - - data->req = (struct nfs_page *) dreq; - data->inode = inode; - data->cred = msg.rpc_cred; - data->args.fh = NFS_FH(inode); - data->args.context = ctx; - data->args.lock_context = dreq->l_ctx; - data->args.offset = pos; - data->args.pgbase = pgbase; - data->args.pages = data->pagevec; - data->args.count = bytes; - data->res.fattr = &data->fattr; - data->res.eof = 0; - data->res.count = bytes; - nfs_fattr_init(&data->fattr); - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - NFS_PROTO(inode)->read_setup(data, &msg); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - break; - rpc_put_task(task); - - dprintk("NFS: %5u initiated direct read call " - "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - bytes, - (unsigned long long)data->args.offset); - - started += bytes; - user_addr += bytes; - pos += bytes; - /* FIXME: Remove this unnecessary math from final patch */ - pgbase += bytes; - pgbase &= ~PAGE_MASK; - BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); - - count -= bytes; - } while (count != 0); + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); + /* XXX do we need to do the eof zeroing found in async_filler? */ + req = nfs_create_request(dreq->ctx, dreq->inode, + pagevec[i], + pgbase, req_len); + if (IS_ERR(req)) { + result = PTR_ERR(req); + break; + } + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(desc, req)) { + result = desc->pg_error; + nfs_release_request(req); + break; + } + pgbase = 0; + bytes -= req_len; + started += req_len; + user_addr += req_len; + pos += req_len; + count -= req_len; + } + /* The nfs_page now hold references to these pages */ + nfs_direct_release_pages(pagevec, npages); + } while (count != 0 && result >= 0); + + kfree(pagevec); if (started) return started; @@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, unsigned long nr_segs, loff_t pos) { + struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; size_t requested_bytes = 0; unsigned long seg; + nfs_pageio_init_read(&desc, dreq->inode, + &nfs_direct_read_completion_ops); get_dreq(dreq); + desc.pg_dreq = dreq; for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(dreq, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos); if (result < 0) break; requested_bytes += result; @@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + nfs_pageio_complete(&desc); + /* * If no bytes were started, return the error, and let the * generic layer handle the completion. @@ -441,104 +447,70 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); if (!result) result = nfs_direct_wait(dreq); + NFS_I(inode)->read_io += result; out_release: nfs_direct_req_release(dreq); out: return result; } -static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) +static void nfs_inode_dio_write_done(struct inode *inode) { - while (!list_empty(&dreq->rewrite_list)) { - struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); - list_del(&data->pages); - nfs_direct_release_pages(data->pagevec, data->npages); - nfs_writedata_free(data); - } + nfs_zap_mapping(inode, inode->i_mapping); + inode_dio_done(inode); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { - struct inode *inode = dreq->inode; - struct list_head *p; - struct nfs_write_data *data; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = dreq->ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_write_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; + struct nfs_pageio_descriptor desc; + struct nfs_page *req, *tmp; + LIST_HEAD(reqs); + struct nfs_commit_info cinfo; + LIST_HEAD(failed); + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); + spin_lock(cinfo.lock); + nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); + spin_unlock(cinfo.lock); dreq->count = 0; get_dreq(dreq); - list_for_each(p, &dreq->rewrite_list) { - data = list_entry(p, struct nfs_write_data, pages); - - get_dreq(dreq); - - /* Use stable writes */ - data->args.stable = NFS_FILE_SYNC; - - /* - * Reset data->res. - */ - nfs_fattr_init(&data->fattr); - data->res.count = data->args.count; - memset(&data->verf, 0, sizeof(data->verf)); - - /* - * Reuse data->task; data->args should not have changed - * since the original request was sent. - */ - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - NFS_PROTO(inode)->write_setup(data, &msg); - - /* - * We're called via an RPC callback, so BKL is already held. - */ - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); - - dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, + &nfs_direct_write_completion_ops); + desc.pg_dreq = dreq; + + list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + if (!nfs_pageio_add_request(&desc, req)) { + nfs_list_add_request(req, &failed); + spin_lock(cinfo.lock); + dreq->flags = 0; + dreq->error = -EIO; + spin_unlock(cinfo.lock); + } } + nfs_pageio_complete(&desc); - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, inode); -} - -static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; + while (!list_empty(&failed)) + nfs_unlock_and_release_request(req); - /* Call the NFS version-specific code */ - NFS_PROTO(data->inode)->commit_done(task, data); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, dreq->inode); } -static void nfs_direct_commit_release(void *calldata) +static void nfs_direct_commit_complete(struct nfs_commit_data *data) { - struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + struct nfs_direct_req *dreq = data->dreq; + struct nfs_commit_info cinfo; + struct nfs_page *req; int status = data->task.tk_status; + nfs_init_cinfo_from_dreq(&cinfo, dreq); if (status < 0) { dprintk("NFS: %5u commit failed with error %d.\n", - data->task.tk_pid, status); + data->task.tk_pid, status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); @@ -546,62 +518,47 @@ static void nfs_direct_commit_release(void *calldata) } dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); - nfs_direct_write_complete(dreq, data->inode); - nfs_commit_free(data); + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { + /* Note the rewrite will go through mds */ + kref_get(&req->wb_kref); + nfs_mark_request_commit(req, NULL, &cinfo); + } + nfs_unlock_and_release_request(req); + } + + if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + nfs_direct_write_complete(dreq, data->inode); } -static const struct rpc_call_ops nfs_commit_direct_ops = { - .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_direct_commit_result, - .rpc_release = nfs_direct_commit_release, +static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) +{ + /* There is no lock to clear */ +} + +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { + .completion = nfs_direct_commit_complete, + .error_cleanup = nfs_direct_error_cleanup, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { - struct nfs_write_data *data = dreq->commit_data; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = dreq->ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .task = &data->task, - .rpc_client = NFS_CLIENT(dreq->inode), - .rpc_message = &msg, - .callback_ops = &nfs_commit_direct_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; - - data->inode = dreq->inode; - data->cred = msg.rpc_cred; - - data->args.fh = NFS_FH(data->inode); - data->args.offset = 0; - data->args.count = 0; - data->args.context = dreq->ctx; - data->args.lock_context = dreq->l_ctx; - data->res.count = 0; - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); - - NFS_PROTO(data->inode)->commit_setup(data, &msg); - - /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ - dreq->commit_data = NULL; - - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); - - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + int res; + struct nfs_commit_info cinfo; + LIST_HEAD(mds_list); + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_scan_commit(dreq->inode, &mds_list, &cinfo); + res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); + if (res < 0) /* res == -ENOMEM */ + nfs_direct_write_reschedule(dreq); } -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +static void nfs_direct_write_schedule_work(struct work_struct *work) { + struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); int flags = dreq->flags; dreq->flags = 0; @@ -613,89 +570,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode nfs_direct_write_reschedule(dreq); break; default: - if (dreq->commit_data != NULL) - nfs_commit_free(dreq->commit_data); - nfs_direct_free_writedata(dreq); - nfs_zap_mapping(inode, inode->i_mapping); + nfs_inode_dio_write_done(dreq->inode); nfs_direct_complete(dreq); } } -static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - dreq->commit_data = nfs_commitdata_alloc(); - if (dreq->commit_data != NULL) - dreq->commit_data->req = (struct nfs_page *) dreq; + schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } + #else -static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +static void nfs_direct_write_schedule_work(struct work_struct *work) { - dreq->commit_data = NULL; } static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - nfs_direct_free_writedata(dreq); - nfs_zap_mapping(inode, inode->i_mapping); + nfs_inode_dio_write_done(inode); nfs_direct_complete(dreq); } #endif -static void nfs_direct_write_result(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; - - nfs_writeback_done(task, data); -} - /* * NB: Return the value of the first error return code. Subsequent * errors after the first one are ignored. */ -static void nfs_direct_write_release(void *calldata) -{ - struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - int status = data->task.tk_status; - - spin_lock(&dreq->lock); - - if (unlikely(status < 0)) { - /* An error has occurred, so we should not commit */ - dreq->flags = 0; - dreq->error = status; - } - if (unlikely(dreq->error != 0)) - goto out_unlock; - - dreq->count += data->res.count; - - if (data->res.verf->committed != NFS_FILE_SYNC) { - switch (dreq->flags) { - case 0: - memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); - dreq->flags = NFS_ODIRECT_DO_COMMIT; - break; - case NFS_ODIRECT_DO_COMMIT: - if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { - dprintk("NFS: %5u write verify failed\n", data->task.tk_pid); - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } - } - } -out_unlock: - spin_unlock(&dreq->lock); - - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, data->inode); -} - -static const struct rpc_call_ops nfs_write_direct_ops = { - .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_direct_write_result, - .rpc_release = nfs_direct_write_release, -}; - /* * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE * operation. If nfs_writedata_alloc() or get_user_pages() fails, @@ -703,132 +603,189 @@ static const struct rpc_call_ops nfs_write_direct_ops = { * handled automatically by nfs_direct_write_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos, int sync) + loff_t pos) { + struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_write_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; ssize_t started = 0; + struct page **pagevec = NULL; + unsigned int npages; do { - struct nfs_write_data *data; size_t bytes; + int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(wsize,count); + bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); result = -ENOMEM; - data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); - if (unlikely(!data)) + npages = nfs_page_array_len(pgbase, bytes); + if (!pagevec) + pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); + if (!pagevec) break; down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 0, 0, data->pagevec, NULL); + npages, 0, 0, pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) { - nfs_writedata_free(data); + if (result < 0) break; - } - if ((unsigned)result < data->npages) { + + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_free(data); + nfs_direct_release_pages(pagevec, result); break; } bytes -= pgbase; - data->npages = result; + npages = result; } - get_dreq(dreq); - - list_move_tail(&data->pages, &dreq->rewrite_list); - - data->req = (struct nfs_page *) dreq; - data->inode = inode; - data->cred = msg.rpc_cred; - data->args.fh = NFS_FH(inode); - data->args.context = ctx; - data->args.lock_context = dreq->l_ctx; - data->args.offset = pos; - data->args.pgbase = pgbase; - data->args.pages = data->pagevec; - data->args.count = bytes; - data->args.stable = sync; - data->res.fattr = &data->fattr; - data->res.count = bytes; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); - - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - NFS_PROTO(inode)->write_setup(data, &msg); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - break; - rpc_put_task(task); - - dprintk("NFS: %5u initiated direct write call " - "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - bytes, - (unsigned long long)data->args.offset); + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - started += bytes; - user_addr += bytes; - pos += bytes; - - /* FIXME: Remove this useless math from the final patch */ - pgbase += bytes; - pgbase &= ~PAGE_MASK; - BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); + req = nfs_create_request(dreq->ctx, dreq->inode, + pagevec[i], + pgbase, req_len); + if (IS_ERR(req)) { + result = PTR_ERR(req); + break; + } + nfs_lock_request(req); + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(desc, req)) { + result = desc->pg_error; + nfs_unlock_and_release_request(req); + break; + } + pgbase = 0; + bytes -= req_len; + started += req_len; + user_addr += req_len; + pos += req_len; + count -= req_len; + } + /* The nfs_page now hold references to these pages */ + nfs_direct_release_pages(pagevec, npages); + } while (count != 0 && result >= 0); - count -= bytes; - } while (count != 0); + kfree(pagevec); if (started) return started; return result < 0 ? (ssize_t) result : -EFAULT; } +static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) +{ + struct nfs_direct_req *dreq = hdr->dreq; + struct nfs_commit_info cinfo; + int bit = -1; + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out_put; + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + + spin_lock(&dreq->lock); + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + dreq->flags = 0; + dreq->error = hdr->error; + } + if (dreq->error != 0) + bit = NFS_IOHDR_ERROR; + else { + dreq->count += hdr->good_bytes; + if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + bit = NFS_IOHDR_NEED_RESCHED; + } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) + bit = NFS_IOHDR_NEED_RESCHED; + else if (dreq->flags == 0) { + memcpy(&dreq->verf, &req->wb_verf, + sizeof(dreq->verf)); + bit = NFS_IOHDR_NEED_COMMIT; + dreq->flags = NFS_ODIRECT_DO_COMMIT; + } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { + if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + bit = NFS_IOHDR_NEED_RESCHED; + } else + bit = NFS_IOHDR_NEED_COMMIT; + } + } + } + spin_unlock(&dreq->lock); + + while (!list_empty(&hdr->pages)) { + req = nfs_list_entry(hdr->pages.next); + nfs_list_remove_request(req); + switch (bit) { + case NFS_IOHDR_NEED_RESCHED: + case NFS_IOHDR_NEED_COMMIT: + kref_get(&req->wb_kref); + nfs_mark_request_commit(req, hdr->lseg, &cinfo); + } + nfs_unlock_and_release_request(req); + } + +out_put: + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, hdr->inode); + hdr->release(hdr); +} + +static void nfs_write_sync_pgio_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_unlock_and_release_request(req); + } +} + +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { + .error_cleanup = nfs_write_sync_pgio_error, + .init_hdr = nfs_direct_pgio_init, + .completion = nfs_direct_write_completion, +}; + static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos, int sync) + loff_t pos) { + struct nfs_pageio_descriptor desc; + struct inode *inode = dreq->inode; ssize_t result = 0; size_t requested_bytes = 0; unsigned long seg; + nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, + &nfs_direct_write_completion_ops); + desc.pg_dreq = dreq; get_dreq(dreq); + atomic_inc(&inode->i_dio_count); for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(dreq, vec, - pos, sync); + result = nfs_direct_write_schedule_segment(&desc, vec, pos); if (result < 0) break; requested_bytes += result; @@ -836,12 +793,15 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; pos += vec->iov_len; } + nfs_pageio_complete(&desc); + NFS_I(dreq->inode)->write_io += desc.pg_bytes_written; /* * If no bytes were started, return the error, and let the * generic layer handle the completion. */ if (requested_bytes == 0) { + inode_dio_done(inode); nfs_direct_req_release(dreq); return result < 0 ? result : -EIO; } @@ -858,16 +818,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; - size_t wsize = NFS_SERVER(inode)->wsize; - int sync = NFS_UNSTABLE; dreq = nfs_direct_req_alloc(); if (!dreq) goto out; - nfs_alloc_commit_data(dreq); - - if (dreq->commit_data == NULL || count <= wsize) - sync = NFS_FILE_SYNC; dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); @@ -877,7 +831,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -997,10 +951,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + if (retval > 0) { + struct inode *inode = mapping->host; - if (retval > 0) iocb->ki_pos = pos + retval; - + spin_lock(&inode->i_lock); + if (i_size_read(inode) < iocb->ki_pos) + i_size_write(inode, iocb->ki_pos); + spin_unlock(&inode->i_lock); + } out: return retval; } |